Skip to content

Commit

Permalink
S3UTILS-158: monitoring dashboards introduction (might need to be dro…
Browse files Browse the repository at this point in the history
…pped until next PR)
  • Loading branch information
benzekrimaha committed May 13, 2024
1 parent 6876b58 commit 9103f57
Show file tree
Hide file tree
Showing 3 changed files with 314 additions and 0 deletions.
211 changes: 211 additions & 0 deletions monitoring/dashboard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
{
"__inputs": [
{
"description": "",
"label": "Prometheus",
"name": "DS_PROMETHEUS",
"pluginId": "prometheus",
"pluginName": "Prometheus",
"type": "datasource"
},
{
"description": "",
"label": "Loki",
"name": "DS_LOKI",
"pluginId": "loki",
"pluginName": "Loki",
"type": "datasource"
},
{
"description": "Namespace associated with the Zenko instance",
"label": "namespace",
"name": "namespace",
"type": "constant",
"value": "zenko"
},
{
"description": "Name of the S3utils job, used to filter the metrics.",
"label": "job",
"name": "job",
"type": "constant",
"value": "artesca-data-ops-count-items-metrics"
},
{
"description": "Prefix of the cronjob pod name, used to filter only the cronjob instances.",
"label": "pod",
"name": "pod",
"type": "constant",
"value": "artesca-data-ops-count-items"
}
],
"annotations": {
"list": []
},
"description": "",
"editable": true,
"gnetId": null,
"hideControls": false,
"id": null,
"links": [],
"panels": [
{
"collapsed": false,
"editable": true,
"error": false,
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": []
}
}
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"hideTimeOverride": false,
"id": 1,
"links": [],
"maxDataPoints": 100,
"panels": [],
"targets": [],
"title": "Processing Duration",
"transformations": [],
"transparent": false,
"type": "row"
},
{
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "smooth",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"log": 2,
"type": "linear"
},
"showPoints": "auto",
"spanNulls": 180000,
"stacking": {},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": []
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 1
},
"hideTimeOverride": false,
"id": 2,
"links": [],
"maxDataPoints": 100,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"datasource": null,
"expr": "sum(rate(count_items_bucketProcessingDuration_count{namespace=\"${namespace}\", job=~\"${job}\"}[$__rate_interval])))",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{namespace}} - {{job}}",
"metric": "",
"refId": "",
"step": 10,
"target": ""
}
],
"title": "bucket count duration",
"transformations": [],
"transparent": false,
"type": "timeseries"
}
],
"refresh": "30s",
"rows": [],
"schemaVersion": 12,
"sharedCrosshair": false,
"style": "dark",
"tags": [
"S3Utils"
],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"hidden": false,
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "S3Utils service",
"uid": null,
"version": 0
}
101 changes: 101 additions & 0 deletions monitoring/dashboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from grafanalib.core import (
ConstantInput,
DataSourceInput,
Heatmap,
HeatmapColor,
HIDE_VARIABLE,
RowPanel,
Stat,
Template,
Templating,
Threshold,
YAxis,
)
from grafanalib import formatunits as UNITS
from scalgrafanalib import (
layout,
BarGauge,
Dashboard,
GaugePanel,
PieChart,
Tooltip,
Target,
TimeSeries
)

bucketCountDuration = TimeSeries(
title="bucket count duration",
dataSource="${DS_PROMETHEUS}",
lineInterpolation="smooth",
spanNulls=3*60*1000,
unit=UNITS.SECONDS,
targets=[Target(
expr='sum(rate(count_items_bucketProcessingDuration_count{namespace="${namespace}", job=~"${job}"}[$__rate_interval])))',
legendFormat='{{namespace}} - {{job}}'
)],
)

consolidationDuration = TimeSeries(
title="consolidation duration",
dataSource="${DS_PROMETHEUS}",
lineInterpolation="smooth",
spanNulls=3*60*1000,
unit=UNITS.SECONDS,
targets=[Target(
expr='sum(rate(count_items_consolidationDuration_count{namespace="${namespace}", job=~"${job}"}[$__rate_interval]))',
legendFormat='{{namespace}} - {{job}}'
)],
)


dashboard = (
Dashboard(
title="S3Utils service",
editable=True,
refresh="30s",
tags=["S3Utils"],
timezone="",
inputs=[
DataSourceInput(
name="DS_PROMETHEUS",
label="Prometheus",
pluginId="prometheus",
pluginName="Prometheus",
),
DataSourceInput(
name="DS_LOKI",
label="Loki",
pluginId="loki",
pluginName="Loki"
),
ConstantInput(
name="namespace",
label="namespace",
description="Namespace associated with the Zenko instance",
value="zenko",
),
ConstantInput(
name="job",
label="job",
description="Name of the S3utils job, used to filter the "
"metrics.",
value="artesca-data-ops-count-items-metrics",
),
ConstantInput(
name="pod",
label="pod",
description="Prefix of the cronjob pod name, used to filter "
"only the cronjob instances.",
value="artesca-data-ops-count-items",
),
],
panels=layout.column([
RowPanel(title="Processing Duration"),
layout.row([bucketCountDuration], height=8),
]),
)
.auto_panel_ids()
.verify_datasources()
)


2 changes: 2 additions & 0 deletions monitoring/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
attrs==21.4.0
grafanalib==0.6.3

0 comments on commit 9103f57

Please sign in to comment.