Merge pull request #606 from paulfantom/collect-all

Restructure collecting of prometheus metrics and bump dependencies
timescale · Oct 19, 2022 · 240a7fe · 240a7fe
2 parents 5dab2d2 + d7e08d0
commit 240a7fe
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 13 deletions.
diff --git a/Makefile b/Makefile
@@ -80,7 +80,6 @@ timescaledb: ## This is a phony target that is used to install the timescaledb-s
 		timescaledb/timescaledb-single \
 		--namespace=timescaledb \
 		--set replicaCount=1 \
-		--set loadBalancer.enabled=false \
 		--set secrets.credentials.PATRONI_SUPERUSER_PASSWORD="temporarypassword" \
 		--set secrets.credentials.PATRONI_admin_PASSWORD="temporarypassword" \
 		--set patroni.log.level=INFO

diff --git a/chart/Chart.yaml b/chart/Chart.yaml
@@ -12,24 +12,23 @@ keywords:
   - monitoring
   - tracing
   - opentelemetry
-version: 16.3.0
+version: 17.0.0
 # TODO(paulfantom): Enable after kubernetes 1.22 reaches EOL (2022-10-28)
 # kubeVersion: ">= 1.23.0"
 dependencies:
   - name: timescaledb-single
     condition: timescaledb-single.enabled
-    version: 0.19.0
+    version: 0.20.0
     repository: https://charts.timescale.com
   - name: promscale
     condition: promscale.enabled
-    version: 14.2.0
+    version: 14.3.0
     repository: https://charts.timescale.com
   - name: kube-prometheus-stack
     condition: kube-prometheus-stack.enabled
-    # With each major bump make sure prometheus-operator CRD version matches version specified in cli/pkg/upgrade/upgrade.go#KubePrometheusCRDVersion
-    version: 40.5.0
+    version: 41.4.1
     repository: https://prometheus-community.github.io/helm-charts
   - name: opentelemetry-operator
     condition: opentelemetry-operator.enabled
-    version: 0.13.3
+    version: 0.14.0
     repository: https://open-telemetry.github.io/opentelemetry-helm-charts
diff --git a/chart/README.md b/chart/README.md
@@ -186,7 +186,6 @@ The chart has the following properties in the `values.yaml` file:
 | ------------------------------------------------ | ------------------------------------------------- | ------------------- |
 | `timescaledb-single.enabled`                     | If false TimescaleDB will not be created          | `true`              |
 | `timescaledb-single.image.tag`                   | Docker image tag to use for TimescaleDB           | `pg14.4-ts2.7.2-p0` |
-| `timescaledb-single.loadBalancer.enabled`        | Create a LB for the DB instead of a ClusterIP     | `false`             |
 | `timescaledb-single.replicaCount`                | Number of pods for DB, set to 3 for HA            | `1`                 |
 | `timescaledb-single.backup.enabled`              | TimescaleDB backup option by default set to false | `false`             |
 | `timescaledb-single.persistentVolumes.data.size` | Size of the volume for the database               | `150Gi`             |

diff --git a/chart/scripts/test-metrics.sh b/chart/scripts/test-metrics.sh
@@ -40,7 +40,7 @@ EOF
 )
 genericTests=$(cat <<-EOF
 {
-  "expression": "up{namespace=\"$NAMESPACE\"}==0",
+  "expression": "up{namespace=\"$NAMESPACE\",pod!~".*-grafana-test"}==0",
   "expected": false
 }
 EOF

diff --git a/chart/values.yaml b/chart/values.yaml
@@ -18,9 +18,6 @@ timescaledb-single:
   env:
     - name: TSTUNE_PROFILE
       value: promscale
-  # create only a ClusterIP service
-  loadBalancer:
-    enabled: false
   # number or TimescaleDB pods to spawn (default is 3, 1 for no HA)
   replicaCount: 1
   # backup is disabled by default, enable it
@@ -157,6 +154,45 @@ kube-prometheus-stack:
       replicaExternalLabelName: "__replica__"
       # Promscale requires a cluster label to be present for high availability mode.
       prometheusExternalLabelName: "cluster"
+
+      # By default collect all PrometheusRules and scrape configuration from the cluster
+      # Exclude all resources labeled with `tobs/excluded`
+      ruleSelectorNilUsesHelmValues: false
+      ruleNamespaceSelector:
+        matchExpressions:
+        - key: tobs/excluded
+          operator: DoesNotExist
+      ruleSelector:
+        matchExpressions:
+        - key: tobs/excluded
+          operator: DoesNotExist
+      serviceMonitorSelectorNilUsesHelmValues: false
+      serviceMonitorSelector:
+        matchExpressions:
+        - key: tobs/excluded
+          operator: DoesNotExist
+      serviceMonitorNamespaceSelector:
+        matchExpressions:
+        - key: tobs/excluded
+          operator: DoesNotExist
+      podMonitorSelectorNilUsesHelmValues: false
+      podMonitorSelector:
+        matchExpressions:
+        - key: tobs/excluded
+          operator: DoesNotExist
+      podMonitorNamespaceSelector:
+        matchExpressions:
+        - key: tobs/excluded
+          operator: DoesNotExist
+      probeSelectorNilUsesHelmValues: false
+      probeSelector:
+        matchExpressions:
+        - key: tobs/excluded
+          operator: DoesNotExist
+      probeNamespaceSelector:
+        matchExpressions:
+        - key: tobs/excluded
+          operator: DoesNotExist
       # The remote_read spec configuration for Prometheus.
       # ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#remotereadspec
       remoteRead:
@@ -198,7 +234,7 @@ kube-prometheus-stack:
       # Expected values are the secret name and key
       # Cannot be used with additionalScrapeConfigs
       additionalScrapeConfigsSecret:
-        enabled: true
+        enabled: false
         name: tobs-scrape-config
         key: additional-scrape-config.yaml
   # Values for configuring the deployment of Grafana

diff --git a/docs/upgrades.md b/docs/upgrades.md
@@ -8,6 +8,30 @@ Firstly upgrade the helm repo to pull the latest available tobs helm chart. We a
 helm repo update
 ```
 
+## Upgrading from 16.x to 17.x
+
+With `17.0.0` we decided to diverge from gathering metrics data only from
+namespace in which tobs is deployed and extend it to all namespaces. To
+accomplish this we changed default kube-prometheus-stack selectors to gather
+all prometheus-operator resources that are not labeled with `tobs/excluded`
+(label value doesn't matter). If you have any other prometheus-operator
+resources in your cluster that you don't want to be scraped by tobs, you need
+to label them with `tobs/excluded` label.
+
+Additionally, to prevent data duplication, we are disabling by default
+ability to scrape endpoints using prometheus label annotations. If you wish
+to continue using this option, you need to explicitly set the following
+option:
+```yaml
+kube-prometheus-stack:
+  prometheus:
+    prometheusSpec:
+      additionalScrapeConfigsSecret:
+        enabled: true
+```
+
+In `17.0.0` we are also updating timescaledb-single chart to version `0.20.0`, which by default uses `ClusterIP` instead of `LoadBalancer` service. This change removes opttion removes field of `timescaledb-single.service.loadBalancerIP`.
+
 ## Upgrading from 15.x to 16.x
 
 With `16.0.0` we removed `grafana-db-sec.yaml` generated Secret as it's no