From 3829a81f0a151e05dbcbd49b7bd6a052c07f4330 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Mon, 31 Jul 2023 17:35:01 +0200 Subject: [PATCH 01/15] Add default tenant shard sizes (#31) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 1 + cortex/config.libsonnet | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7aefd62..d106e37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## master / unreleased +* [CHANGE] Add default tenant shard sizes * [CHANGE] Use cortex v1.15.3 * [CHANGE] Azure storage endpoint suffix is set to `blob.core.windows.net` for backward compatibility diff --git a/cortex/config.libsonnet b/cortex/config.libsonnet index 5eb93eb..39b50ee 100644 --- a/cortex/config.libsonnet +++ b/cortex/config.libsonnet @@ -260,6 +260,8 @@ // No retention for now. compactor_blocks_retention_period: '0', + + ingestion_tenant_shard_size: 3, }, medium_small_user:: { @@ -277,6 +279,8 @@ // 1000 rules ruler_max_rules_per_rule_group: 20, ruler_max_rule_groups_per_tenant: 50, + + ingestion_tenant_shard_size: 9, }, small_user:: { @@ -294,6 +298,8 @@ // 1400 rules ruler_max_rules_per_rule_group: 20, ruler_max_rule_groups_per_tenant: 70, + + ingestion_tenant_shard_size: 15, }, medium_user:: { @@ -311,6 +317,8 @@ // 1800 rules ruler_max_rules_per_rule_group: 20, ruler_max_rule_groups_per_tenant: 90, + + ingestion_tenant_shard_size: 30, }, big_user:: { @@ -328,6 +336,8 @@ // 2200 rules ruler_max_rules_per_rule_group: 20, ruler_max_rule_groups_per_tenant: 110, + + ingestion_tenant_shard_size: 60, }, super_user:: { @@ -345,6 +355,8 @@ // 2600 rules ruler_max_rules_per_rule_group: 20, ruler_max_rule_groups_per_tenant: 130, + + ingestion_tenant_shard_size: 120, }, // This user class has limits increased by +50% compared to the previous one. @@ -363,6 +375,8 @@ // 3000 rules ruler_max_rules_per_rule_group: 20, ruler_max_rule_groups_per_tenant: 150, + + ingestion_tenant_shard_size: 180, }, }, From cb68f902e53a9606479b9c2ba8fbc1df7ba919f3 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Wed, 9 Aug 2023 10:53:18 +0200 Subject: [PATCH 02/15] Configure default GOMAXPROCS and GOMEMLIMIT (#32) Also remove mem-ballast, that is not required if using GOMEMLIMIT Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 3 +++ cortex/alertmanager.libsonnet | 6 ++++++ cortex/compactor.libsonnet | 6 ++++++ cortex/distributor.libsonnet | 11 ++++++----- cortex/flusher-job-blocks.libsonnet | 6 ++++++ cortex/ingester.libsonnet | 28 +++++++--------------------- cortex/querier.libsonnet | 8 +++----- cortex/query-frontend.libsonnet | 6 ++++++ cortex/query-scheduler.libsonnet | 6 ++++++ cortex/query-tee.libsonnet | 6 ++++++ cortex/ruler.libsonnet | 6 ++++++ cortex/store-gateway.libsonnet | 6 ++++++ cortex/test-exporter.libsonnet | 6 ++++++ 13 files changed, 73 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d106e37..c84974e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # Changelog ## master / unreleased +* [CHANGE] Remove mem-ballast from distributor and querier. +* [CHANGE] Increase cpu requests for querier to 2. +* [CHANGE] Configure default GOMAXPROCS and GOMEMLIMIT for all cortex modules * [CHANGE] Add default tenant shard sizes * [CHANGE] Use cortex v1.15.3 * [CHANGE] Azure storage endpoint suffix is set to `blob.core.windows.net` for backward compatibility diff --git a/cortex/alertmanager.libsonnet b/cortex/alertmanager.libsonnet index 480112d..4df2e77 100644 --- a/cortex/alertmanager.libsonnet +++ b/cortex/alertmanager.libsonnet @@ -96,6 +96,7 @@ if $._config.alertmanager_enabled then container.new('alertmanager', $._images.alertmanager) + container.withPorts($.util.defaultPorts + mode.ports) + + container.withEnvMap($.alertmanager_env_map) + container.withEnvMixin([container.envType.fromFieldPath('POD_IP', 'status.podIP')]) + container.withArgsMixin( $.util.mapToFlags($.alertmanager_args) + @@ -112,6 +113,11 @@ $.jaeger_mixin else {}, + alertmanager_env_map:: { + GOMAXPROCS: '1', + GOMEMLIMIT: '1GiB', + }, + alertmanager_statefulset: if $._config.alertmanager_enabled then statefulSet.new('alertmanager', $._config.alertmanager.replicas, [$.alertmanager_container], $.alertmanager_pvc) + diff --git a/cortex/compactor.libsonnet b/cortex/compactor.libsonnet index 03df1ab..9edfcdc 100644 --- a/cortex/compactor.libsonnet +++ b/cortex/compactor.libsonnet @@ -43,6 +43,7 @@ container.new('compactor', $._images.compactor) + container.withPorts($.compactor_ports) + container.withArgsMixin($.util.mapToFlags($.compactor_args)) + + container.withEnvMap($.compactor_env_map) + container.withVolumeMountsMixin([volumeMount.new('compactor-data', '/data')]) + // Do not limit compactor CPU and request enough cores to honor configured max concurrency. $.util.resourcesRequests($._config.cortex_compactor_max_concurrency, '6Gi') + @@ -50,6 +51,11 @@ $.util.readinessProbe + $.jaeger_mixin, + compactor_env_map:: { + GOMAXPROCS: std.toString($._config.cortex_compactor_max_concurrency), + GOMEMLIMIT: '6GiB', + }, + newCompactorStatefulSet(name, container):: statefulSet.new(name, 1, [container], compactor_data_pvc) + statefulSet.mixin.spec.withServiceName(name) + diff --git a/cortex/distributor.libsonnet b/cortex/distributor.libsonnet index c2bcfe0..86a17e2 100644 --- a/cortex/distributor.libsonnet +++ b/cortex/distributor.libsonnet @@ -20,11 +20,6 @@ 'distributor.ha-tracker.etcd.endpoints': 'etcd-client.%s.svc.cluster.local.:2379' % $._config.namespace, 'distributor.ha-tracker.prefix': 'prom_ha/', - // The memory requests are 2G, and we barely use 100M. - // By adding a ballast of 1G, we can drastically reduce GC, but also keep the usage at - // around 1.25G, reducing the 99%ile. - 'mem-ballast-size-bytes': 1 << 30, // 1GB - 'server.grpc.keepalive.max-connection-age': '2m', 'server.grpc.keepalive.max-connection-age-grace': '5m', 'server.grpc.keepalive.max-connection-idle': '1m', @@ -38,12 +33,18 @@ 'distributor.extend-writes': $._config.unregister_ingesters_on_shutdown, }, + distributor_env_map:: { + GOMAXPROCS: '2', + GOMEMLIMIT: '2GiB', + }, + distributor_ports:: $.util.defaultPorts, distributor_container:: container.new('distributor', $._images.distributor) + container.withPorts($.distributor_ports) + container.withArgsMixin($.util.mapToFlags($.distributor_args)) + + container.withEnvMap($.distributor_env_map) + $.util.resourcesRequests('2', '2Gi') + $.util.resourcesLimits(null, '4Gi') + $.util.readinessProbe + diff --git a/cortex/flusher-job-blocks.libsonnet b/cortex/flusher-job-blocks.libsonnet index 1e6266c..6917a86 100644 --- a/cortex/flusher-job-blocks.libsonnet +++ b/cortex/flusher-job-blocks.libsonnet @@ -21,11 +21,17 @@ target: 'flusher', 'blocks-storage.tsdb.retention-period': '10000h', // don't delete old blocks too soon. })) + + container.withEnvMap($.flusher_env_map) + $.util.resourcesRequests('4', '15Gi') + $.util.resourcesLimits(null, '25Gi') + $.util.readinessProbe + $.jaeger_mixin, + flusher_env_map:: { + GOMAXPROCS: '4', + GOMEMLIMIT: '15GiB', + }, + flusher_job_func(jobName, pvcName):: job.new() + job.mixin.spec.template.spec.withContainers([ diff --git a/cortex/ingester.libsonnet b/cortex/ingester.libsonnet index 818716e..7994589 100644 --- a/cortex/ingester.libsonnet +++ b/cortex/ingester.libsonnet @@ -3,6 +3,7 @@ local pvc = $.core.v1.persistentVolumeClaim, local statefulSet = $.apps.v1.statefulSet, local volume = $.core.v1.volume, + local volumeMount = $.core.v1.volumeMount, // The ingesters should persist TSDB blocks and WAL on a persistent // volume in order to be crash resilient. @@ -44,18 +45,6 @@ 'ingester.tokens-file-path': '/data/tokens', }, - ingester_statefulset_args:: - $._config.grpcConfig - { - 'ingester.wal-enabled': true, - 'ingester.checkpoint-enabled': true, - 'ingester.recover-from-wal': true, - 'ingester.wal-dir': $._config.ingester.wal_dir, - 'ingester.checkpoint-duration': '15m', - '-log.level': 'info', - 'ingester.tokens-file-path': $._config.ingester.wal_dir + '/tokens', - }, - ingester_ports:: $.util.defaultPorts, local name = 'ingester', @@ -65,22 +54,19 @@ container.new(name, $._images.ingester) + container.withPorts($.ingester_ports) + container.withArgsMixin($.util.mapToFlags($.ingester_args)) + + container.withEnvMap($.ingester_env_map) + $.util.resourcesRequests('4', '15Gi') + $.util.resourcesLimits(null, '25Gi') + $.util.readinessProbe + $.jaeger_mixin, - local volumeMount = $.core.v1.volumeMount, - - ingester_statefulset_container:: - $.ingester_container + - container.withArgsMixin($.util.mapToFlags($.ingester_statefulset_args)) + - container.withVolumeMountsMixin([ - volumeMount.new('ingester-pvc', $._config.ingester.wal_dir), - ]), - ingester_deployment_labels:: {}, + ingester_env_map:: { + GOMAXPROCS: '4', + GOMEMLIMIT: '15GiB', + }, + local ingester_pvc = pvc.new('ingester-pvc') + pvc.mixin.spec.resources.withRequests({ storage: $._config.ingester.statefulset_disk }) + diff --git a/cortex/querier.libsonnet b/cortex/querier.libsonnet index 6ebe85f..e5cb82e 100644 --- a/cortex/querier.libsonnet +++ b/cortex/querier.libsonnet @@ -26,16 +26,14 @@ 'querier.frontend-address': 'query-frontend-discovery.%(namespace)s.svc.cluster.local:9095' % $._config, 'querier.frontend-client.grpc-max-send-msg-size': 100 << 20, - // We request high memory but the Go heap is typically very low (< 100MB) and this causes - // the GC to trigger continuously. Setting a ballast of 256MB reduces GC. - 'mem-ballast-size-bytes': 1 << 28, // 256M - 'log.level': 'debug', }, querier_ports:: $.util.defaultPorts, querier_env_map:: { + GOMAXPROCS: '2', + GOMEMLIMIT: '12Gi', JAEGER_REPORTER_MAX_QUEUE_SIZE: '1024', // Default is 100. }, @@ -46,7 +44,7 @@ $.jaeger_mixin + $.util.readinessProbe + container.withEnvMap($.querier_env_map) + - $.util.resourcesRequests('1', '12Gi') + + $.util.resourcesRequests('2', '12Gi') + $.util.resourcesLimits(null, '24Gi'), local deployment = $.apps.v1.deployment, diff --git a/cortex/query-frontend.libsonnet b/cortex/query-frontend.libsonnet index 80f36d0..39d4f6d 100644 --- a/cortex/query-frontend.libsonnet +++ b/cortex/query-frontend.libsonnet @@ -42,11 +42,17 @@ container.new('query-frontend', $._images.query_frontend) + container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.query_frontend_args)) + + container.withEnvMap($.query_frontend_env_map) + $.jaeger_mixin + $.util.readinessProbe + $.util.resourcesRequests('2', '600Mi') + $.util.resourcesLimits(null, '1200Mi'), + query_frontend_env_map:: { + GOMAXPROCS: '2', + GOMEMLIMIT: '600MiB', + }, + local deployment = $.apps.v1.deployment, newQueryFrontendDeployment(name, container):: diff --git a/cortex/query-scheduler.libsonnet b/cortex/query-scheduler.libsonnet index 604d258..b0a60a5 100644 --- a/cortex/query-scheduler.libsonnet +++ b/cortex/query-scheduler.libsonnet @@ -17,6 +17,7 @@ container.new('query-scheduler', $._images.query_scheduler) + container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.query_scheduler_args)) + + container.withEnvMap($.query_scheduler_env_map) + $.jaeger_mixin + $.util.readinessProbe + $.util.resourcesRequests('2', '1Gi') + @@ -30,6 +31,11 @@ deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), + query_scheduler_env_map:: { + GOMAXPROCS: '2', + GOMEMLIMIT: '1GiB', + }, + query_scheduler_deployment: if !$._config.query_scheduler_enabled then {} else self.newQuerySchedulerDeployment('query-scheduler', $.query_scheduler_container), diff --git a/cortex/query-tee.libsonnet b/cortex/query-tee.libsonnet index 4ac3b0a..0e1250c 100644 --- a/cortex/query-tee.libsonnet +++ b/cortex/query-tee.libsonnet @@ -18,9 +18,15 @@ containerPort.newNamed(name='http-metrics', containerPort=9900), ]) + container.withArgsMixin($.util.mapToFlags($.query_tee_args)) + + container.withEnvMap($.query_tee_env_map) + $.util.resourcesRequests('1', '512Mi') + $.jaeger_mixin, + query_tee_env_map:: { + GOMAXPROCS: '1', + GOMEMLIMIT: '512MiB', + }, + query_tee_deployment: if !($._config.query_tee_enabled) then {} else deployment.new('query-tee', 2, [$.query_tee_container]), diff --git a/cortex/ruler.libsonnet b/cortex/ruler.libsonnet index cfb0252..1688ca6 100644 --- a/cortex/ruler.libsonnet +++ b/cortex/ruler.libsonnet @@ -38,6 +38,7 @@ container.new('ruler', $._images.ruler) + container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.ruler_args)) + + container.withEnvMap($.ruler_env_map) + $.util.resourcesRequests('1', '6Gi') + $.util.resourcesLimits('16', '16Gi') + $.util.readinessProbe + @@ -56,6 +57,11 @@ $.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex') else {}, + ruler_env_map:: { + GOMAXPROCS: '2', + GOMEMLIMIT: '6GiB', + }, + local service = $.core.v1.service, ruler_service: diff --git a/cortex/store-gateway.libsonnet b/cortex/store-gateway.libsonnet index cea6308..757d9b6 100644 --- a/cortex/store-gateway.libsonnet +++ b/cortex/store-gateway.libsonnet @@ -40,12 +40,18 @@ container.new('store-gateway', $._images.store_gateway) + container.withPorts($.store_gateway_ports) + container.withArgsMixin($.util.mapToFlags($.store_gateway_args)) + + container.withEnvMap($.store_gateway_env_map) + container.withVolumeMountsMixin([volumeMount.new('store-gateway-data', '/data')]) + $.util.resourcesRequests('1', '12Gi') + $.util.resourcesLimits(null, '18Gi') + $.util.readinessProbe + $.jaeger_mixin, + store_gateway_env_map:: { + GOMAXPROCS: '2', + GOMEMLIMIT: '12GiB', + }, + newStoreGatewayStatefulSet(name, container):: statefulSet.new(name, 3, [container], store_gateway_data_pvc) + statefulSet.mixin.spec.withServiceName(name) + diff --git a/cortex/test-exporter.libsonnet b/cortex/test-exporter.libsonnet index 9d69abe..036d6fe 100644 --- a/cortex/test-exporter.libsonnet +++ b/cortex/test-exporter.libsonnet @@ -18,10 +18,16 @@ container.new('test-exporter', $._images.testExporter) + container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.test_exporter_args)) + + container.withEnvMap($.test_exporter_env_map) + $.util.resourcesRequests('100m', '100Mi') + $.util.resourcesLimits('100m', '100Mi') + $.jaeger_mixin, + test_exporter_env_map:: { + GOMAXPROCS: '1', + GOMEMLIMIT: '100MiB', + }, + local deployment = $.apps.v1.deployment, test_exporter_deployment: From 89d82f98ef5b1d1b721ab0dc0fb4d515e42df23e Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Fri, 25 Aug 2023 13:41:18 +0200 Subject: [PATCH 03/15] Add default instance limits for distributors and ingesters (#33) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 2 ++ cortex/config.libsonnet | 13 ++++++------- cortex/distributor.libsonnet | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c84974e..67a2b4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## master / unreleased +* [CHANGE] Add default instance max series for ingesters +* [CHANGE] Add default instance max inflight pushes for distributors * [CHANGE] Remove mem-ballast from distributor and querier. * [CHANGE] Increase cpu requests for querier to 2. * [CHANGE] Configure default GOMAXPROCS and GOMEMLIMIT for all cortex modules diff --git a/cortex/config.libsonnet b/cortex/config.libsonnet index 39b50ee..bad810f 100644 --- a/cortex/config.libsonnet +++ b/cortex/config.libsonnet @@ -395,13 +395,12 @@ ingester_stream_chunks_when_using_blocks: true, // Ingester limits are put directly into runtime config, if not null. Available limits: - // ingester_instance_limits: { - // max_inflight_push_requests: 0, // Max inflight push requests per ingester. 0 = no limit. - // max_ingestion_rate: 0, // Max ingestion rate (samples/second) per ingester. 0 = no limit. - // max_series: 0, // Max number of series per ingester. 0 = no limit. - // max_tenants: 0, // Max number of tenants per ingester. 0 = no limit. - // }, - ingester_instance_limits: null, + ingester_instance_limits: { + // max_inflight_push_requests: 0, // Max inflight push requests per ingester. 0 = no limit. + // max_ingestion_rate: 0, // Max ingestion rate (samples/second) per ingester. 0 = no limit. + max_series: 4.8e+6, // Max number of series per ingester. 0 = no limit. 4.8 million is closely tied to 15Gb in requests per ingester + // max_tenants: 0, // Max number of tenants per ingester. 0 = no limit. + }, }, local configMap = $.core.v1.configMap, diff --git a/cortex/distributor.libsonnet b/cortex/distributor.libsonnet index 86a17e2..13501b6 100644 --- a/cortex/distributor.libsonnet +++ b/cortex/distributor.libsonnet @@ -31,6 +31,7 @@ // Do not extend the replication set on unhealthy (or LEAVING) ingester when "unregister on shutdown" // is set to false. 'distributor.extend-writes': $._config.unregister_ingesters_on_shutdown, + 'distributor.instance-limits.max-inflight-push-requests': 60, //60 is very conservative to protect the distributor from OOMs }, distributor_env_map:: { From 7a667ea291158dec375cf370d77212db24714c38 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Mon, 28 Aug 2023 09:09:24 +0200 Subject: [PATCH 04/15] Decrease gomemlimit a bit to avoid running out of memory before trashing (#34) Signed-off-by: Friedrich Gonzalez --- cortex/compactor.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cortex/compactor.libsonnet b/cortex/compactor.libsonnet index 9edfcdc..da511fd 100644 --- a/cortex/compactor.libsonnet +++ b/cortex/compactor.libsonnet @@ -53,7 +53,7 @@ compactor_env_map:: { GOMAXPROCS: std.toString($._config.cortex_compactor_max_concurrency), - GOMEMLIMIT: '6GiB', + GOMEMLIMIT: '5GiB', }, newCompactorStatefulSet(name, container):: From c35087e44f7aec30fd8ae0c78daa07a51acfbf68 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Thu, 28 Sep 2023 11:08:36 +0200 Subject: [PATCH 05/15] Fix querier GOMEMLIMIT (#35) Signed-off-by: Friedrich Gonzalez --- cortex/querier.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cortex/querier.libsonnet b/cortex/querier.libsonnet index e5cb82e..9cde3f2 100644 --- a/cortex/querier.libsonnet +++ b/cortex/querier.libsonnet @@ -33,7 +33,7 @@ querier_env_map:: { GOMAXPROCS: '2', - GOMEMLIMIT: '12Gi', + GOMEMLIMIT: '12GiB', JAEGER_REPORTER_MAX_QUEUE_SIZE: '1024', // Default is 100. }, From 524c3b3401fbe6efdb34339772cff63c05eeee03 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Mon, 20 Nov 2023 20:12:36 +0100 Subject: [PATCH 06/15] Configure GOMAXPROCS and GOMEMLIMIT based on requests cpu and memory Signed-off-by: Friedrich Gonzalez --- cortex/alertmanager.libsonnet | 11 ++++++++--- cortex/compactor.libsonnet | 11 ++++++++--- cortex/distributor.libsonnet | 9 +++++++-- cortex/ingester.libsonnet | 9 +++++++-- cortex/querier.libsonnet | 9 +++++++-- cortex/query-frontend.libsonnet | 9 +++++++-- cortex/query-scheduler.libsonnet | 9 +++++++-- cortex/query-tee.libsonnet | 9 +++++++-- cortex/ruler.libsonnet | 11 ++++++++--- cortex/store-gateway.libsonnet | 11 ++++++++--- 10 files changed, 74 insertions(+), 24 deletions(-) diff --git a/cortex/alertmanager.libsonnet b/cortex/alertmanager.libsonnet index 4df2e77..719ac85 100644 --- a/cortex/alertmanager.libsonnet +++ b/cortex/alertmanager.libsonnet @@ -3,6 +3,7 @@ local volumeMount = $.core.v1.volumeMount, local volume = $.core.v1.volume, local container = $.core.v1.container, + local envType = container.envType, local statefulSet = $.apps.v1.statefulSet, local service = $.core.v1.service, local configMap = $.core.v1.configMap, @@ -98,6 +99,12 @@ container.withPorts($.util.defaultPorts + mode.ports) + container.withEnvMap($.alertmanager_env_map) + container.withEnvMixin([container.envType.fromFieldPath('POD_IP', 'status.podIP')]) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + container.withArgsMixin( $.util.mapToFlags($.alertmanager_args) + mode.flags @@ -108,14 +115,12 @@ [volumeMount.new('alertmanager-fallback-config', '/configs')] else [] ) + - $.util.resourcesRequests('100m', '1Gi') + + $.util.resourcesRequests('1', '1Gi') + $.util.readinessProbe + $.jaeger_mixin else {}, alertmanager_env_map:: { - GOMAXPROCS: '1', - GOMEMLIMIT: '1GiB', }, alertmanager_statefulset: diff --git a/cortex/compactor.libsonnet b/cortex/compactor.libsonnet index da511fd..2e78c4d 100644 --- a/cortex/compactor.libsonnet +++ b/cortex/compactor.libsonnet @@ -1,5 +1,6 @@ { local container = $.core.v1.container, + local envType = container.envType, local pvc = $.core.v1.persistentVolumeClaim, local statefulSet = $.apps.v1.statefulSet, local volumeMount = $.core.v1.volumeMount, @@ -44,16 +45,20 @@ container.withPorts($.compactor_ports) + container.withArgsMixin($.util.mapToFlags($.compactor_args)) + container.withEnvMap($.compactor_env_map) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + container.withVolumeMountsMixin([volumeMount.new('compactor-data', '/data')]) + // Do not limit compactor CPU and request enough cores to honor configured max concurrency. - $.util.resourcesRequests($._config.cortex_compactor_max_concurrency, '6Gi') + + $.util.resourcesRequests($._config.cortex_compactor_max_concurrency, '5Gi') + $.util.resourcesLimits(null, '6Gi') + $.util.readinessProbe + $.jaeger_mixin, compactor_env_map:: { - GOMAXPROCS: std.toString($._config.cortex_compactor_max_concurrency), - GOMEMLIMIT: '5GiB', }, newCompactorStatefulSet(name, container):: diff --git a/cortex/distributor.libsonnet b/cortex/distributor.libsonnet index 13501b6..79c4033 100644 --- a/cortex/distributor.libsonnet +++ b/cortex/distributor.libsonnet @@ -1,5 +1,6 @@ { local container = $.core.v1.container, + local envType = container.envType, local containerPort = $.core.v1.containerPort, distributor_args:: @@ -35,8 +36,6 @@ }, distributor_env_map:: { - GOMAXPROCS: '2', - GOMEMLIMIT: '2GiB', }, distributor_ports:: $.util.defaultPorts, @@ -45,6 +44,12 @@ container.new('distributor', $._images.distributor) + container.withPorts($.distributor_ports) + container.withArgsMixin($.util.mapToFlags($.distributor_args)) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + container.withEnvMap($.distributor_env_map) + $.util.resourcesRequests('2', '2Gi') + $.util.resourcesLimits(null, '4Gi') + diff --git a/cortex/ingester.libsonnet b/cortex/ingester.libsonnet index 7994589..93f8ab5 100644 --- a/cortex/ingester.libsonnet +++ b/cortex/ingester.libsonnet @@ -49,12 +49,19 @@ local name = 'ingester', local container = $.core.v1.container, + local envType = container.envType, ingester_container:: container.new(name, $._images.ingester) + container.withPorts($.ingester_ports) + container.withArgsMixin($.util.mapToFlags($.ingester_args)) + container.withEnvMap($.ingester_env_map) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + $.util.resourcesRequests('4', '15Gi') + $.util.resourcesLimits(null, '25Gi') + $.util.readinessProbe + @@ -63,8 +70,6 @@ ingester_deployment_labels:: {}, ingester_env_map:: { - GOMAXPROCS: '4', - GOMEMLIMIT: '15GiB', }, local ingester_pvc = diff --git a/cortex/querier.libsonnet b/cortex/querier.libsonnet index 9cde3f2..d58759c 100644 --- a/cortex/querier.libsonnet +++ b/cortex/querier.libsonnet @@ -1,5 +1,6 @@ { local container = $.core.v1.container, + local envType = container.envType, querier_args:: $._config.grpcConfig + @@ -32,8 +33,6 @@ querier_ports:: $.util.defaultPorts, querier_env_map:: { - GOMAXPROCS: '2', - GOMEMLIMIT: '12GiB', JAEGER_REPORTER_MAX_QUEUE_SIZE: '1024', // Default is 100. }, @@ -44,6 +43,12 @@ $.jaeger_mixin + $.util.readinessProbe + container.withEnvMap($.querier_env_map) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + $.util.resourcesRequests('2', '12Gi') + $.util.resourcesLimits(null, '24Gi'), diff --git a/cortex/query-frontend.libsonnet b/cortex/query-frontend.libsonnet index 39d4f6d..e3b3667 100644 --- a/cortex/query-frontend.libsonnet +++ b/cortex/query-frontend.libsonnet @@ -1,5 +1,6 @@ { local container = $.core.v1.container, + local envType = container.envType, query_frontend_args:: $._config.grpcConfig @@ -43,14 +44,18 @@ container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.query_frontend_args)) + container.withEnvMap($.query_frontend_env_map) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + $.jaeger_mixin + $.util.readinessProbe + $.util.resourcesRequests('2', '600Mi') + $.util.resourcesLimits(null, '1200Mi'), query_frontend_env_map:: { - GOMAXPROCS: '2', - GOMEMLIMIT: '600MiB', }, local deployment = $.apps.v1.deployment, diff --git a/cortex/query-scheduler.libsonnet b/cortex/query-scheduler.libsonnet index b0a60a5..fab9295 100644 --- a/cortex/query-scheduler.libsonnet +++ b/cortex/query-scheduler.libsonnet @@ -3,6 +3,7 @@ { local container = $.core.v1.container, local deployment = $.apps.v1.deployment, + local envType = container.envType, local service = $.core.v1.service, query_scheduler_args+:: @@ -18,6 +19,12 @@ container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.query_scheduler_args)) + container.withEnvMap($.query_scheduler_env_map) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + $.jaeger_mixin + $.util.readinessProbe + $.util.resourcesRequests('2', '1Gi') + @@ -32,8 +39,6 @@ deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), query_scheduler_env_map:: { - GOMAXPROCS: '2', - GOMEMLIMIT: '1GiB', }, query_scheduler_deployment: if !$._config.query_scheduler_enabled then {} else diff --git a/cortex/query-tee.libsonnet b/cortex/query-tee.libsonnet index 0e1250c..6a89e99 100644 --- a/cortex/query-tee.libsonnet +++ b/cortex/query-tee.libsonnet @@ -2,6 +2,7 @@ local container = $.core.v1.container, local containerPort = $.core.v1.containerPort, local deployment = $.apps.v1.deployment, + local envType = container.envType, local service = $.core.v1.service, local servicePort = $.core.v1.servicePort, @@ -19,12 +20,16 @@ ]) + container.withArgsMixin($.util.mapToFlags($.query_tee_args)) + container.withEnvMap($.query_tee_env_map) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + $.util.resourcesRequests('1', '512Mi') + $.jaeger_mixin, query_tee_env_map:: { - GOMAXPROCS: '1', - GOMEMLIMIT: '512MiB', }, query_tee_deployment: if !($._config.query_tee_enabled) then {} else diff --git a/cortex/ruler.libsonnet b/cortex/ruler.libsonnet index 1688ca6..3735684 100644 --- a/cortex/ruler.libsonnet +++ b/cortex/ruler.libsonnet @@ -1,5 +1,6 @@ { local container = $.core.v1.container, + local envType = container.envType, ruler_args:: $._config.grpcConfig + @@ -39,7 +40,13 @@ container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.ruler_args)) + container.withEnvMap($.ruler_env_map) + - $.util.resourcesRequests('1', '6Gi') + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + + $.util.resourcesRequests('2', '6Gi') + $.util.resourcesLimits('16', '16Gi') + $.util.readinessProbe + $.jaeger_mixin @@ -58,8 +65,6 @@ else {}, ruler_env_map:: { - GOMAXPROCS: '2', - GOMEMLIMIT: '6GiB', }, local service = $.core.v1.service, diff --git a/cortex/store-gateway.libsonnet b/cortex/store-gateway.libsonnet index 757d9b6..7250b25 100644 --- a/cortex/store-gateway.libsonnet +++ b/cortex/store-gateway.libsonnet @@ -1,5 +1,6 @@ { local container = $.core.v1.container, + local envType = container.envType, local podDisruptionBudget = $.policy.v1.podDisruptionBudget, local pvc = $.core.v1.persistentVolumeClaim, local statefulSet = $.apps.v1.statefulSet, @@ -41,15 +42,19 @@ container.withPorts($.store_gateway_ports) + container.withArgsMixin($.util.mapToFlags($.store_gateway_args)) + container.withEnvMap($.store_gateway_env_map) + + container.withEnvMixin([ + envType.withName('GOMAXPROCS') + + envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), + envType.withName('GOMEMLIMIT') + + envType.valueFrom.resourceFieldRef.withResource('requests.memory'), + ]) + container.withVolumeMountsMixin([volumeMount.new('store-gateway-data', '/data')]) + - $.util.resourcesRequests('1', '12Gi') + + $.util.resourcesRequests('2', '12Gi') + $.util.resourcesLimits(null, '18Gi') + $.util.readinessProbe + $.jaeger_mixin, store_gateway_env_map:: { - GOMAXPROCS: '2', - GOMEMLIMIT: '12GiB', }, newStoreGatewayStatefulSet(name, container):: From d9260dbc41c6bbc00e8c82931a20883595b841d7 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Mon, 20 Nov 2023 20:25:49 +0100 Subject: [PATCH 07/15] Update changelog for https://github.com/cortexproject/cortex-jsonnet/commit/524c3b3401fbe6efdb34339772cff63c05eeee03 (#36) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67a2b4b..1123828 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ * [CHANGE] Add default instance max inflight pushes for distributors * [CHANGE] Remove mem-ballast from distributor and querier. * [CHANGE] Increase cpu requests for querier to 2. -* [CHANGE] Configure default GOMAXPROCS and GOMEMLIMIT for all cortex modules +* [CHANGE] Configure GOMAXPROCS and GOMEMLIMIT for all cortex modules based on requested cpu and memory * [CHANGE] Add default tenant shard sizes * [CHANGE] Use cortex v1.15.3 * [CHANGE] Azure storage endpoint suffix is set to `blob.core.windows.net` for backward compatibility From 006c8fb25bcbd13780e83c1b7275f5d5fe595f5d Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Mon, 20 Nov 2023 20:33:21 +0100 Subject: [PATCH 08/15] Revert alertmanager back to 100m for requests CPU Signed-off-by: Friedrich Gonzalez --- cortex/alertmanager.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cortex/alertmanager.libsonnet b/cortex/alertmanager.libsonnet index 719ac85..f9725ce 100644 --- a/cortex/alertmanager.libsonnet +++ b/cortex/alertmanager.libsonnet @@ -115,7 +115,7 @@ [volumeMount.new('alertmanager-fallback-config', '/configs')] else [] ) + - $.util.resourcesRequests('1', '1Gi') + + $.util.resourcesRequests('100m', '1Gi') + $.util.readinessProbe + $.jaeger_mixin else {}, From 272aaee2086e84f5c5ca26de12ad80a3eaa63932 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Fri, 24 Nov 2023 03:24:21 +0100 Subject: [PATCH 09/15] Reorganize limits better and allow to use limits.cpu and limits.memory too (#37) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 2 +- cortex/alertmanager.libsonnet | 7 +------ cortex/compactor.libsonnet | 7 +------ cortex/config.libsonnet | 27 +++++++++++++++++++++++++++ cortex/distributor.libsonnet | 7 +------ cortex/flusher-job-blocks.libsonnet | 3 +-- cortex/ingester.libsonnet | 7 +------ cortex/overrides-exporter.libsonnet | 1 + cortex/querier.libsonnet | 7 +------ cortex/query-frontend.libsonnet | 7 +------ cortex/query-scheduler.libsonnet | 7 +------ cortex/query-tee.libsonnet | 7 +------ cortex/ruler.libsonnet | 7 +------ cortex/store-gateway.libsonnet | 7 +------ cortex/test-exporter.libsonnet | 3 +-- 15 files changed, 41 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1123828..f5e7ae1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ * [CHANGE] Add default instance max inflight pushes for distributors * [CHANGE] Remove mem-ballast from distributor and querier. * [CHANGE] Increase cpu requests for querier to 2. -* [CHANGE] Configure GOMAXPROCS and GOMEMLIMIT for all cortex modules based on requested cpu and memory +* [CHANGE] Configure GOMAXPROCS and GOMEMLIMIT for all cortex modules based on cpu and memory requests or limits * [CHANGE] Add default tenant shard sizes * [CHANGE] Use cortex v1.15.3 * [CHANGE] Azure storage endpoint suffix is set to `blob.core.windows.net` for backward compatibility diff --git a/cortex/alertmanager.libsonnet b/cortex/alertmanager.libsonnet index f9725ce..1e870c6 100644 --- a/cortex/alertmanager.libsonnet +++ b/cortex/alertmanager.libsonnet @@ -99,12 +99,6 @@ container.withPorts($.util.defaultPorts + mode.ports) + container.withEnvMap($.alertmanager_env_map) + container.withEnvMixin([container.envType.fromFieldPath('POD_IP', 'status.podIP')]) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + container.withArgsMixin( $.util.mapToFlags($.alertmanager_args) + mode.flags @@ -117,6 +111,7 @@ ) + $.util.resourcesRequests('100m', '1Gi') + $.util.readinessProbe + + $.go_container_mixin + $.jaeger_mixin else {}, diff --git a/cortex/compactor.libsonnet b/cortex/compactor.libsonnet index 2e78c4d..d12d5de 100644 --- a/cortex/compactor.libsonnet +++ b/cortex/compactor.libsonnet @@ -45,17 +45,12 @@ container.withPorts($.compactor_ports) + container.withArgsMixin($.util.mapToFlags($.compactor_args)) + container.withEnvMap($.compactor_env_map) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + container.withVolumeMountsMixin([volumeMount.new('compactor-data', '/data')]) + // Do not limit compactor CPU and request enough cores to honor configured max concurrency. $.util.resourcesRequests($._config.cortex_compactor_max_concurrency, '5Gi') + $.util.resourcesLimits(null, '6Gi') + $.util.readinessProbe + + $.go_container_mixin + $.jaeger_mixin, compactor_env_map:: { diff --git a/cortex/config.libsonnet b/cortex/config.libsonnet index bad810f..11c3f07 100644 --- a/cortex/config.libsonnet +++ b/cortex/config.libsonnet @@ -401,8 +401,35 @@ max_series: 4.8e+6, // Max number of series per ingester. 0 = no limit. 4.8 million is closely tied to 15Gb in requests per ingester // max_tenants: 0, // Max number of tenants per ingester. 0 = no limit. }, + + // if we disable this, we need to make sure we set the resource limits + // Disabling this can potentially increase cortex performance, + // but it will also cause performance inconsistencies + gomaxprocs_based_on_cpu_requests: true, + gomemlimit_based_on_mem_requests: true, + + gomaxprocs_resource: + if $._config.gomaxprocs_based_on_cpu_requests then + 'requests.cpu' + else + 'limits.cpu', + + gomemlimit_resource: + if $._config.gomemlimit_based_on_mem_requests then + 'requests.memory' + else + 'limits.memory', }, + go_container_mixin:: + local container = $.core.v1.container; + container.withEnvMixin([ + container.envType.withName('GOMAXPROCS') + + container.envType.valueFrom.resourceFieldRef.withResource($._config.gomaxprocs_resource), + container.envType.withName('GOMEMLIMIT') + + container.envType.valueFrom.resourceFieldRef.withResource($._config.gomemlimit_resource), + ]), + local configMap = $.core.v1.configMap, overrides_config: diff --git a/cortex/distributor.libsonnet b/cortex/distributor.libsonnet index 79c4033..2759156 100644 --- a/cortex/distributor.libsonnet +++ b/cortex/distributor.libsonnet @@ -44,16 +44,11 @@ container.new('distributor', $._images.distributor) + container.withPorts($.distributor_ports) + container.withArgsMixin($.util.mapToFlags($.distributor_args)) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + container.withEnvMap($.distributor_env_map) + $.util.resourcesRequests('2', '2Gi') + $.util.resourcesLimits(null, '4Gi') + $.util.readinessProbe + + $.go_container_mixin + $.jaeger_mixin, local deployment = $.apps.v1.deployment, diff --git a/cortex/flusher-job-blocks.libsonnet b/cortex/flusher-job-blocks.libsonnet index 6917a86..56264c1 100644 --- a/cortex/flusher-job-blocks.libsonnet +++ b/cortex/flusher-job-blocks.libsonnet @@ -25,11 +25,10 @@ $.util.resourcesRequests('4', '15Gi') + $.util.resourcesLimits(null, '25Gi') + $.util.readinessProbe + + $.go_container_mixin + $.jaeger_mixin, flusher_env_map:: { - GOMAXPROCS: '4', - GOMEMLIMIT: '15GiB', }, flusher_job_func(jobName, pvcName):: diff --git a/cortex/ingester.libsonnet b/cortex/ingester.libsonnet index 93f8ab5..11e22f5 100644 --- a/cortex/ingester.libsonnet +++ b/cortex/ingester.libsonnet @@ -56,15 +56,10 @@ container.withPorts($.ingester_ports) + container.withArgsMixin($.util.mapToFlags($.ingester_args)) + container.withEnvMap($.ingester_env_map) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + $.util.resourcesRequests('4', '15Gi') + $.util.resourcesLimits(null, '25Gi') + $.util.readinessProbe + + $.go_container_mixin + $.jaeger_mixin, ingester_deployment_labels:: {}, diff --git a/cortex/overrides-exporter.libsonnet b/cortex/overrides-exporter.libsonnet index 1f9de4e..8fbf4ac 100644 --- a/cortex/overrides-exporter.libsonnet +++ b/cortex/overrides-exporter.libsonnet @@ -20,6 +20,7 @@ container.withArgsMixin($.util.mapToFlags($.overrides_exporter_args, prefix='--')) + $.util.resourcesRequests('0.5', '0.5Gi') + $.util.readinessProbe + + $.go_container_mixin + container.mixin.readinessProbe.httpGet.withPort($.overrides_exporter_port.name), local deployment = $.apps.v1.deployment, diff --git a/cortex/querier.libsonnet b/cortex/querier.libsonnet index d58759c..15e2245 100644 --- a/cortex/querier.libsonnet +++ b/cortex/querier.libsonnet @@ -43,12 +43,7 @@ $.jaeger_mixin + $.util.readinessProbe + container.withEnvMap($.querier_env_map) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + + $.go_container_mixin + $.util.resourcesRequests('2', '12Gi') + $.util.resourcesLimits(null, '24Gi'), diff --git a/cortex/query-frontend.libsonnet b/cortex/query-frontend.libsonnet index e3b3667..5cbabaf 100644 --- a/cortex/query-frontend.libsonnet +++ b/cortex/query-frontend.libsonnet @@ -44,12 +44,7 @@ container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.query_frontend_args)) + container.withEnvMap($.query_frontend_env_map) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + + $.go_container_mixin + $.jaeger_mixin + $.util.readinessProbe + $.util.resourcesRequests('2', '600Mi') + diff --git a/cortex/query-scheduler.libsonnet b/cortex/query-scheduler.libsonnet index fab9295..8aa5cf7 100644 --- a/cortex/query-scheduler.libsonnet +++ b/cortex/query-scheduler.libsonnet @@ -19,12 +19,7 @@ container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.query_scheduler_args)) + container.withEnvMap($.query_scheduler_env_map) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + + $.go_container_mixin + $.jaeger_mixin + $.util.readinessProbe + $.util.resourcesRequests('2', '1Gi') + diff --git a/cortex/query-tee.libsonnet b/cortex/query-tee.libsonnet index 6a89e99..9856c34 100644 --- a/cortex/query-tee.libsonnet +++ b/cortex/query-tee.libsonnet @@ -20,13 +20,8 @@ ]) + container.withArgsMixin($.util.mapToFlags($.query_tee_args)) + container.withEnvMap($.query_tee_env_map) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + $.util.resourcesRequests('1', '512Mi') + + $.go_container_mixin + $.jaeger_mixin, query_tee_env_map:: { diff --git a/cortex/ruler.libsonnet b/cortex/ruler.libsonnet index 3735684..c60a874 100644 --- a/cortex/ruler.libsonnet +++ b/cortex/ruler.libsonnet @@ -40,12 +40,7 @@ container.withPorts($.util.defaultPorts) + container.withArgsMixin($.util.mapToFlags($.ruler_args)) + container.withEnvMap($.ruler_env_map) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + + $.go_container_mixin + $.util.resourcesRequests('2', '6Gi') + $.util.resourcesLimits('16', '16Gi') + $.util.readinessProbe + diff --git a/cortex/store-gateway.libsonnet b/cortex/store-gateway.libsonnet index 7250b25..c21ee30 100644 --- a/cortex/store-gateway.libsonnet +++ b/cortex/store-gateway.libsonnet @@ -42,12 +42,7 @@ container.withPorts($.store_gateway_ports) + container.withArgsMixin($.util.mapToFlags($.store_gateway_args)) + container.withEnvMap($.store_gateway_env_map) + - container.withEnvMixin([ - envType.withName('GOMAXPROCS') + - envType.valueFrom.resourceFieldRef.withResource('requests.cpu'), - envType.withName('GOMEMLIMIT') + - envType.valueFrom.resourceFieldRef.withResource('requests.memory'), - ]) + + $.go_container_mixin + container.withVolumeMountsMixin([volumeMount.new('store-gateway-data', '/data')]) + $.util.resourcesRequests('2', '12Gi') + $.util.resourcesLimits(null, '18Gi') + diff --git a/cortex/test-exporter.libsonnet b/cortex/test-exporter.libsonnet index 036d6fe..e7d088e 100644 --- a/cortex/test-exporter.libsonnet +++ b/cortex/test-exporter.libsonnet @@ -21,11 +21,10 @@ container.withEnvMap($.test_exporter_env_map) + $.util.resourcesRequests('100m', '100Mi') + $.util.resourcesLimits('100m', '100Mi') + + $.go_container_mixin + $.jaeger_mixin, test_exporter_env_map:: { - GOMAXPROCS: '1', - GOMEMLIMIT: '100MiB', }, local deployment = $.apps.v1.deployment, From c9f3e2060eb2b3f4c4d47fc811b02d7b23e41757 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Fri, 24 Nov 2023 03:32:50 +0100 Subject: [PATCH 10/15] Release v1.15.3 (#38) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5e7ae1..70c494d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # Changelog ## master / unreleased + +## 1.15.3 / 2023-11-24 * [CHANGE] Add default instance max series for ingesters * [CHANGE] Add default instance max inflight pushes for distributors * [CHANGE] Remove mem-ballast from distributor and querier. From cbb7997c018bb124553ce965c4e0993c1ce86827 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Fri, 24 Nov 2023 04:12:51 +0100 Subject: [PATCH 11/15] Upgrade build image (#39) * Upgrade build-image alpine 3.18 tanka 0.26.0 go 1.21 Signed-off-by: Friedrich Gonzalez * Update ci.yaml Signed-off-by: Friedrich Gonzalez --------- Signed-off-by: Friedrich Gonzalez --- .github/workflows/ci.yaml | 6 +++--- README.md | 4 ++-- build-image/Dockerfile | 14 +++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e0eac22..fa49025 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,7 +9,7 @@ on: jobs: lint: runs-on: ubuntu-latest - container: quay.io/cortexproject/cortex-jsonnet-build-image:e63d87f + container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda steps: - uses: actions/checkout@v2 name: Checkout @@ -23,7 +23,7 @@ jobs: run: make lint-playbooks build: runs-on: ubuntu-latest - container: quay.io/cortexproject/cortex-jsonnet-build-image:e63d87f + container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda steps: - uses: actions/checkout@v2 name: Checkout @@ -34,7 +34,7 @@ jobs: run: make build-mixin readme: runs-on: ubuntu-latest - container: quay.io/cortexproject/cortex-jsonnet-build-image:e63d87f + container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda steps: - uses: actions/checkout@v2 name: Checkout diff --git a/README.md b/README.md index b0a5133..539559a 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ To generate the YAMLs for deploying Cortex: ```console $ # make sure to be outside of GOPATH or a go.mod project - $ GO111MODULE=on go install github.com/grafana/tanka/cmd/tk@v0.24.0 + $ GO111MODULE=on go install github.com/grafana/tanka/cmd/tk@v0.26.0 $ GO111MODULE=on go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@v0.5.1 ``` @@ -25,7 +25,7 @@ To generate the YAMLs for deploying Cortex: ```console $ mkdir && cd - $ tk init --k8s=1.24 # this includes github.com/jsonnet-libs/k8s-libsonnet/1.24@main + $ tk init --k8s=1.26 # this includes github.com/jsonnet-libs/k8s-libsonnet/1.26@main $ jb install github.com/cortexproject/cortex-jsonnet/cortex@main ``` diff --git a/build-image/Dockerfile b/build-image/Dockerfile index a874e69..70d2fd9 100644 --- a/build-image/Dockerfile +++ b/build-image/Dockerfile @@ -1,5 +1,5 @@ # Build jsonnet -FROM alpine:3.17 AS jsonnet-builder +FROM alpine:3.18 AS jsonnet-builder RUN apk add --no-cache git make g++ RUN git clone https://github.com/google/jsonnet && \ git -C jsonnet checkout v0.20.0 && \ @@ -8,7 +8,7 @@ RUN git clone https://github.com/google/jsonnet && \ cp jsonnet/jsonnetfmt /usr/bin # Build jb -FROM alpine:3.17 AS jb-builder +FROM alpine:3.18 AS jb-builder ARG JSONNET_BUNDLER_VERSION=0.5.1 ARG JSONNET_BUNDLER_CHECKSUM="f5bccc94d28fbbe8ad1d46fd4f208619e45d368a5d7924f6335f4ecfa0605c85 /usr/bin/jb" RUN apk add --no-cache curl @@ -17,19 +17,19 @@ RUN echo "${JSONNET_BUNDLER_CHECKSUM}" | sha256sum -c || (printf "wanted: %s\n RUN chmod +x /usr/bin/jb # Build tanka -FROM alpine:3.17 AS tk-builder -ARG TANKA_VERSION=0.24.0 -ARG TANKA_CHECKSUM="82c8c533c29eefea0af9c28f487203b19dec84ce2624702f99196e777f946ddc /usr/bin/tk" +FROM alpine:3.18 AS tk-builder +ARG TANKA_VERSION=0.26.0 +ARG TANKA_CHECKSUM="089796ae2ce65390501b2c68ceca1ce99ff12787d5ae3b4823c825a07e6e22f4 /usr/bin/tk" RUN apk add --no-cache curl RUN curl -fSL -o "/usr/bin/tk" "https://github.com/grafana/tanka/releases/download/v${TANKA_VERSION}/tk-linux-amd64" RUN echo "${TANKA_CHECKSUM}" | sha256sum -c || (printf "wanted: %s\n got: %s\n" "${TANKA_CHECKSUM}" "$(sha256sum /usr/bin/tk)"; exit 1) RUN chmod +x /usr/bin/tk # Build mixtool -FROM golang:1.20-alpine AS mixtool-builder +FROM golang:1.21-alpine AS mixtool-builder RUN GO111MODULE=on go install github.com/monitoring-mixins/mixtool/cmd/mixtool@ae18e31161ea10545b9c1ac0d23c10122f2c12b5 -FROM alpine:3.17 +FROM alpine:3.18 RUN apk add --no-cache git make libgcc libstdc++ zip findutils sed yq COPY --from=jsonnet-builder /usr/bin/jsonnetfmt /usr/bin COPY --from=jsonnet-builder /usr/bin/jsonnet /usr/bin From 967325b522ad51c703648f33563b8ea31547ba46 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Fri, 24 Nov 2023 04:20:44 +0100 Subject: [PATCH 12/15] Use cortex v1.16.0 (#40) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 1 + cortex/images.libsonnet | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70c494d..f3fd047 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## master / unreleased +* [CHANGE] Use cortex v1.16.0 ## 1.15.3 / 2023-11-24 * [CHANGE] Add default instance max series for ingesters diff --git a/cortex/images.libsonnet b/cortex/images.libsonnet index 05f89c0..fed6dc7 100644 --- a/cortex/images.libsonnet +++ b/cortex/images.libsonnet @@ -5,7 +5,7 @@ memcachedExporter: 'prom/memcached-exporter:v0.6.0', // Our services. - cortex: 'cortexproject/cortex:v1.15.3', + cortex: 'cortexproject/cortex:v1.16.0', alertmanager: self.cortex, distributor: self.cortex, @@ -20,7 +20,7 @@ query_scheduler: self.cortex, overrides_exporter: self.cortex, - query_tee: 'quay.io/cortexproject/query-tee:v1.15.3', - testExporter: 'cortexproject/test-exporter:v1.15.3', + query_tee: 'quay.io/cortexproject/query-tee:v1.16.0', + testExporter: 'cortexproject/test-exporter:v1.16.0', }, } From 8a792a819c53b74cd36da4ee44504f326c8b7cd1 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Wed, 20 Dec 2023 15:31:11 +0100 Subject: [PATCH 13/15] Enable query stats on frontend by default (#41) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 1 + cortex/query-frontend.libsonnet | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3fd047..2387656 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## master / unreleased * [CHANGE] Use cortex v1.16.0 +* [ENHANCEMENT] Enable frontend query stats by default ## 1.15.3 / 2023-11-24 * [CHANGE] Add default instance max series for ingesters diff --git a/cortex/query-frontend.libsonnet b/cortex/query-frontend.libsonnet index 5cbabaf..a055221 100644 --- a/cortex/query-frontend.libsonnet +++ b/cortex/query-frontend.libsonnet @@ -7,9 +7,12 @@ { target: 'query-frontend', - // Need log.level=debug so all queries are logged, needed for analyse.py. + // Need log.level=debug to see trace id for queries 'log.level': 'debug', + // a message with some statistics is logged for every query. + 'frontend.query-stats-enabled': true, + // Increase HTTP server response write timeout, as we were seeing some // queries that return a lot of data timeing out. 'server.http-write-timeout': '1m', From 810c37b0396f23072c32a26d878452c53be758a0 Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Wed, 20 Dec 2023 16:07:07 +0100 Subject: [PATCH 14/15] Enable ruler query stats by default (#42) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 1 + cortex/ruler.libsonnet | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2387656..65a43a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## master / unreleased * [CHANGE] Use cortex v1.16.0 * [ENHANCEMENT] Enable frontend query stats by default +* [ENHANCEMENT] Enable ruler query stats by default ## 1.15.3 / 2023-11-24 * [CHANGE] Add default instance max series for ingesters diff --git a/cortex/ruler.libsonnet b/cortex/ruler.libsonnet index c60a874..b3f4702 100644 --- a/cortex/ruler.libsonnet +++ b/cortex/ruler.libsonnet @@ -32,6 +32,9 @@ // Do not extend the replication set on unhealthy (or LEAVING) ingester when "unregister on shutdown" // is set to false. 'distributor.extend-writes': $._config.unregister_ingesters_on_shutdown, + + // a message with some statistics is logged for every query. + 'ruler.query-stats-enabled': true, }, ruler_container:: From 2e0f64ddc17be61a5c4779500470a27df24fc4ee Mon Sep 17 00:00:00 2001 From: Friedrich Gonzalez Date: Tue, 20 Feb 2024 10:40:01 +0100 Subject: [PATCH 15/15] Upgrade memcached to 1.6.23 and memcached-exporter to v0.14.2 (#43) Signed-off-by: Friedrich Gonzalez --- CHANGELOG.md | 1 + cortex/images.libsonnet | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65a43a0..db11f82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## master / unreleased +* [CHANGE] Upgrade memcached to 1.6.23-alpine and memcached-exporter to v0.14.2 * [CHANGE] Use cortex v1.16.0 * [ENHANCEMENT] Enable frontend query stats by default * [ENHANCEMENT] Enable ruler query stats by default diff --git a/cortex/images.libsonnet b/cortex/images.libsonnet index fed6dc7..b87f965 100644 --- a/cortex/images.libsonnet +++ b/cortex/images.libsonnet @@ -1,8 +1,8 @@ { _images+:: { // Various third-party images. - memcached: 'memcached:1.6.9-alpine', - memcachedExporter: 'prom/memcached-exporter:v0.6.0', + memcached: 'memcached:1.6.23-alpine', + memcachedExporter: 'prom/memcached-exporter:v0.14.2', // Our services. cortex: 'cortexproject/cortex:v1.16.0',