From d9bf7728486dc61faab9e85119d241b84d4e080e Mon Sep 17 00:00:00 2001
From: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com>
Date: Sun, 8 Sep 2024 10:36:17 -0700
Subject: [PATCH 1/4] Update CortexProvisioningTooManyActiveSeries to 3.2M
 series per ingester (#59)

* Update CortexProvisioningTooManyActiveSeries to 3.2M series per ingester

Signed-off-by: Friedrich Gonzalez <friedrichg@gmail.com>

* Adjust more things

Signed-off-by: Friedrich Gonzalez <friedrichg@gmail.com>

* Update CHANGELOG.md

---------

Signed-off-by: Friedrich Gonzalez <friedrichg@gmail.com>
Signed-off-by: Narsing Metpally <nmetpally@axon.com>
---
 CHANGELOG.md                           | 1 +
 cortex-mixin/alerts/alerts.libsonnet   | 6 +++---
 cortex-mixin/docs/playbooks.md         | 6 +++---
 cortex-mixin/recording_rules.libsonnet | 6 +++---
 4 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e0206ac..e6895ec1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 * [CHANGE] Use cortex v1.17.1
 * [CHANGE] Enable shuffle sharding in compactors
 * [CHANGE] Remove chunks support for dashboards
+* [CHANGE] Target 3M memory series per ingester instead of 1.5M
 * [CHANGE] Update jsonnet-libs to Fri Jul 19 12:51:49 2024 #57
 * [ENHANCEMENT] Configure `-ingester.client.grpc-compression` to be `snappy-block`
 * [ENHANCEMENT] Support Grafana 11 in Cortex Service Scaling Dashboard
diff --git a/cortex-mixin/alerts/alerts.libsonnet b/cortex-mixin/alerts/alerts.libsonnet
index e67ef449..7145d028 100644
--- a/cortex-mixin/alerts/alerts.libsonnet
+++ b/cortex-mixin/alerts/alerts.libsonnet
@@ -389,11 +389,11 @@
       rules: [
         {
           alert: 'CortexProvisioningTooManyActiveSeries',
-          // We target each ingester to 1.5M in-memory series. This alert fires if the average
-          // number of series / ingester in a Cortex cluster is > 1.6M for 2h (we compact
+          // We target each ingester to 3.0M in-memory series. This alert fires if the average
+          // number of series / ingester in a Cortex cluster is > 3.2M for 2h (we compact
           // the TSDB head every 2h).
           expr: |||
-            avg by (%s) (cortex_ingester_memory_series) > 1.6e6
+            avg by (%s) (cortex_ingester_memory_series) > 3.2e6
           ||| % [$._config.alert_aggregation_labels],
           'for': '2h',
           labels: {
diff --git a/cortex-mixin/docs/playbooks.md b/cortex-mixin/docs/playbooks.md
index b5b68895..39586870 100644
--- a/cortex-mixin/docs/playbooks.md
+++ b/cortex-mixin/docs/playbooks.md
@@ -555,13 +555,13 @@ How to **investigate**:
 
 ### CortexProvisioningTooManyActiveSeries
 
-This alert fires if the average number of in-memory series per ingester is above our target (1.5M).
+This alert fires if the average number of in-memory series per ingester is above our target (3.0M).
 
 How to **fix**:
 - Scale up ingesters
   - To find out the Cortex clusters where ingesters should be scaled up and how many minimum replicas are expected:
     ```
-    ceil(sum by(cluster, namespace) (cortex_ingester_memory_series) / 1.5e6) >
+    ceil(sum by(cluster, namespace) (cortex_ingester_memory_series) / 3.0e6) >
     count by(cluster, namespace) (cortex_ingester_memory_series)
     ```
 - After the scale up, the in-memory series are expected to be reduced at the next TSDB head compaction (occurring every 2h)
@@ -595,7 +595,7 @@ How to **fix**:
     kubectl -n <namespace> delete pod ingester-XXX
     ```
   - Restarting an ingester typically reduces the memory allocated by mmap-ed files. After the restart, ingester may allocate this memory again over time, but it may give more time while working on a longer term solution
-- Check the `Cortex / Writes Resources` dashboard to see if the number of series per ingester is above the target (1.5M). If so:
+- Check the `Cortex / Writes Resources` dashboard to see if the number of series per ingester is above the target (3.0M). If so:
   - Scale up ingesters
   - Memory is expected to be reclaimed at the next TSDB head compaction (occurring every 2h)
 
diff --git a/cortex-mixin/recording_rules.libsonnet b/cortex-mixin/recording_rules.libsonnet
index 03835247..86650fa5 100644
--- a/cortex-mixin/recording_rules.libsonnet
+++ b/cortex-mixin/recording_rules.libsonnet
@@ -2,7 +2,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
 
 {
   local _config = {
-    max_series_per_ingester: 1.5e6,
+    max_series_per_ingester: 3.0e6,
     max_samples_per_sec_per_ingester: 80e3,
     max_samples_per_sec_per_distributor: 240e3,
     limit_utilisation_target: 0.6,
@@ -148,7 +148,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
             ||| % _config,
           },
           {
-            // Ingester should have 1.5M series in memory
+            // Ingester should have 3.0M series in memory
             record: 'cluster_namespace_deployment_reason:required_replicas:count',
             labels: {
               deployment: 'ingester',
@@ -167,7 +167,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
           },
           {
             // We should be about to cover 60% of our limits,
-            // and ingester can have 1.5M series in memory
+            // and ingester can have 3.0M series in memory
             record: 'cluster_namespace_deployment_reason:required_replicas:count',
             labels: {
               deployment: 'ingester',

From 727addc452387560290b9050b76c89b8d1c4bd46 Mon Sep 17 00:00:00 2001
From: Narsing Metpally <nmetpally@axon.com>
Date: Tue, 24 Sep 2024 12:39:10 -0600
Subject: [PATCH 2/4] Increase CortexProvisioningTooManyWrites alert threshold
 to 160k

Signed-off-by: Narsing Metpally <nmetpally@axon.com>
---
 cortex-mixin/alerts/alerts.libsonnet | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cortex-mixin/alerts/alerts.libsonnet b/cortex-mixin/alerts/alerts.libsonnet
index 7145d028..9abe1ba2 100644
--- a/cortex-mixin/alerts/alerts.libsonnet
+++ b/cortex-mixin/alerts/alerts.libsonnet
@@ -409,7 +409,7 @@
           alert: 'CortexProvisioningTooManyWrites',
           // 80k writes / s per ingester max.
           expr: |||
-            avg by (%s) (rate(cortex_ingester_ingested_samples_total[1m])) > 80e3
+            avg by (%s) (rate(cortex_ingester_ingested_samples_total[1m])) > 160e3
           ||| % $._config.alert_aggregation_labels,
           'for': '15m',
           labels: {

From 9acc4874af959928ed107d9c4a8481f908274a6f Mon Sep 17 00:00:00 2001
From: Narsing Metpally <nmetpally@axon.com>
Date: Wed, 25 Sep 2024 12:35:12 -0600
Subject: [PATCH 3/4] updating changelog

Signed-off-by: Narsing Metpally <nmetpally@axon.com>
---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e6895ec1..74252e88 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
 * [CHANGE] Remove chunks support for dashboards
 * [CHANGE] Target 3M memory series per ingester instead of 1.5M
 * [CHANGE] Update jsonnet-libs to Fri Jul 19 12:51:49 2024 #57
+* [CHANGE] Increase CortexProvisioningTooManyWrites alert threshold to 160e3
 * [ENHANCEMENT] Configure `-ingester.client.grpc-compression` to be `snappy-block`
 * [ENHANCEMENT] Support Grafana 11 in Cortex Service Scaling Dashboard
 

From f3b61810674a0cecdf9c7edc9314ba5b3e7d2af7 Mon Sep 17 00:00:00 2001
From: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com>
Date: Wed, 25 Sep 2024 11:42:12 -0700
Subject: [PATCH 4/4] fix comment

---
 cortex-mixin/alerts/alerts.libsonnet | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cortex-mixin/alerts/alerts.libsonnet b/cortex-mixin/alerts/alerts.libsonnet
index 9abe1ba2..ec44565c 100644
--- a/cortex-mixin/alerts/alerts.libsonnet
+++ b/cortex-mixin/alerts/alerts.libsonnet
@@ -407,7 +407,7 @@
         },
         {
           alert: 'CortexProvisioningTooManyWrites',
-          // 80k writes / s per ingester max.
+          // 160k writes / s per ingester max.
           expr: |||
             avg by (%s) (rate(cortex_ingester_ingested_samples_total[1m])) > 160e3
           ||| % $._config.alert_aggregation_labels,