Skip to content

Commit

Permalink
add rules
Browse files Browse the repository at this point in the history
  • Loading branch information
lhpqaq committed Dec 18, 2024
1 parent c297683 commit f96ef8a
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ global:
external_labels:
monitor: 'codelab-monitor'
# Rule files specifies a list of globs. Rules and alerts are read from
# all matching files.
rule_files:
<#if rule_files_name??>
- ${rule_files_name}
</#if>
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?xml version="1.0"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ https://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->

<configuration>
<property>
<name>rules_file_name</name>
<value>prometheus_rules.yml</value>
<description>Rules file name</description>
</property>
<property>
<name>content</name>
<description>This is the freemarker template for rules file</description>
<value><![CDATA[
groups:
# Recording rules group: Used to calculate and save new aggregated metrics
- name: example_recording_rules
interval: 1m # The frequency at which the rules are evaluated
rules:
# Recording rule: Calculate the average CPU usage over the last 5 minutes for each job
- record: job:cpu_usage:avg
expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (job)
# This creates a new metric `job:cpu_usage:avg` representing the average CPU usage per job
# Alerting rules group: Used to trigger alerts based on conditions
- name: example_alerting_rules
interval: 1m # The frequency at which the alerting rules are evaluated
rules:
# Alerting rule: Trigger an alert if the average CPU usage is over 90% for the last 5 minutes
- alert: HighCpuUsage
expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (instance) > 0.9
# This expression checks if the average CPU usage over the last 5 minutes for each instance is greater than 90%
for: 5m # The condition must hold true for 5 minutes before the alert is triggered
labels:
severity: critical # Set the severity of the alert as 'critical'
annotations:
summary: "CPU usage on instance {{ $labels.instance }} is over 90% for the last 5 minutes"
# Summary of the alert that will appear when it triggers
description: "The CPU usage on instance {{ $labels.instance }} has been over 90% for the past 5 minutes."
# Detailed description of the alert that will provide more context
]]>
</value>
<attrs>
<type>longtext</type>
</attrs>
</property>
</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ public class PrometheusParams extends InfraParams {
private String prometheusPort;
private String prometheusContent;
private String prometheusScrapeInterval;
private String prometheusRulesFilename;
private String prometheusRulesFileContent;

public PrometheusParams(CommandPayload commandPayload) {
super(commandPayload);
Expand All @@ -59,6 +61,7 @@ public PrometheusParams(CommandPayload commandPayload) {
scrapeJobs.add(agentScrapeJob);
globalParamsMap.put("scrape_jobs", scrapeJobs);
globalParamsMap.put("scrape_interval", prometheusScrapeInterval);
globalParamsMap.put("rules_file_name", prometheusRulesFilename);
}

public String dataDir() {
Expand Down Expand Up @@ -89,7 +92,7 @@ protected List<String> getAllHost() {

@GlobalParams
public Map<String, Object> prometheusJob() {
Map<String, Object> configuration = LocalSettings.configurations(getServiceName(), "prometheus");
Map<String, Object> configuration = LocalSettings.configurations(getServiceName(), "prometheus-conf");
prometheusPort = (String) configuration.get("port");
Map<String, Object> job = new HashMap<>();
job.put("name", PROMETHEUS_SELF_JOB_NAME);
Expand All @@ -106,19 +109,28 @@ public Map<String, Object> agentJob() {
job.put("targets_file", targetsConfigFile(BM_AGENT_JOB_NAME));
job.put("targets_list", getAllHost());
agentScrapeJob = job;
return LocalSettings.configurations(getServiceName(), "prometheus");
return LocalSettings.configurations(getServiceName(), "prometheus-conf");
}

@GlobalParams
public Map<String, Object> configs() {
Map<String, Object> configuration = LocalSettings.configurations(getServiceName(), "prometheus");
Map<String, Object> configuration = LocalSettings.configurations(getServiceName(), "prometheus-conf");

prometheusContent = (String) configuration.get("content");
prometheusScrapeInterval = (String) configuration.get("scrape_interval");
return configuration;
}

public Object listenAddress() {
@GlobalParams
public Map<String, Object> rules() {
Map<String, Object> configuration = LocalSettings.configurations(getServiceName(), "prometheus-rule");

prometheusRulesFilename = (String) configuration.get("rules_file_name");
prometheusRulesFileContent = (String) configuration.get("content");
return configuration;
}

public String listenAddress() {
return MessageFormat.format("0.0.0.0:{0}", prometheusPort);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ public static ShellResult config(Params params) {
Constants.PERMISSION_644,
prometheusParams.getGlobalParamsMap());

LinuxFileUtils.toFileByTemplate(
prometheusParams.getPrometheusRulesFileContent(),
MessageFormat.format(
"{0}/{1}", prometheusParams.confDir(), prometheusParams.getPrometheusRulesFilename()),
user,
group,
Constants.PERMISSION_644,
prometheusParams.getGlobalParamsMap());

for (int i = 0; i < prometheusParams.getScrapeJobs().size(); i++) {
Map<String, Object> job = prometheusParams.getScrapeJobs().get(i);
Map<String, List<String>> targets = new HashMap<>();
Expand Down

0 comments on commit f96ef8a

Please sign in to comment.