Skip to content

Commit

Permalink
Merge branch 'main' into krajo/wip-rw2
Browse files Browse the repository at this point in the history
  • Loading branch information
krajorama committed Jan 17, 2025
2 parents a864aa3 + 504dd37 commit a7b348d
Show file tree
Hide file tree
Showing 38 changed files with 1,607 additions and 95 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

### Grafana Mimir

* [CHANGE] Querier: pass context to queryable `IsApplicable` hook. #10451
* [CHANGE] Distributor: OTLP and push handler replace all non-UTF8 characters with the unicode replacement character `\uFFFD` in error messages before propagating them. #10236
* [CHANGE] Querier: pass query matchers to queryable `IsApplicable` hook. #10256
* [CHANGE] Query-frontend: Add `topic` label to `cortex_ingest_storage_strong_consistency_requests_total`, `cortex_ingest_storage_strong_consistency_failures_total`, and `cortex_ingest_storage_strong_consistency_wait_duration_seconds` metrics. #10220
* [CHANGE] Ruler: cap the rate of retries for remote query evaluation to 170/sec. This is configurable via `-ruler.query-frontend.max-retries-rate`. #10375 #10403
* [CHANGE] Query-frontend: Add `topic` label to `cortex_ingest_storage_reader_last_produced_offset_requests_total`, `cortex_ingest_storage_reader_last_produced_offset_failures_total`, `cortex_ingest_storage_reader_last_produced_offset_request_duration_seconds`, `cortex_ingest_storage_reader_partition_start_offset_requests_total`, `cortex_ingest_storage_reader_partition_start_offset_failures_total`, `cortex_ingest_storage_reader_partition_start_offset_request_duration_seconds` metrics. #10462
* [ENHANCEMENT] Query Frontend: Return server-side `samples_processed` statistics. #10103
* [ENHANCEMENT] Distributor: OTLP receiver now converts also metric metadata. See also https://github.com/prometheus/prometheus/pull/15416. #10168
* [ENHANCEMENT] Distributor: discard float and histogram samples with duplicated timestamps from each timeseries in a request before the request is forwarded to ingesters. Discarded samples are tracked by `cortex_discarded_samples_total` metrics with the reason `sample_duplicate_timestamp`. #10145 #10430
Expand All @@ -19,6 +21,7 @@
* [ENHANCEMENT] Ruler: When rule concurrency is enabled for a rule group, its rules will now be reordered and run in batches based on their dependencies. This increases the number of rules that can potentially run concurrently. Note that the global and tenant-specific limits still apply #10400
* [ENHANCEMENT] Query-frontend: include more information about read consistency in trace spans produced when using experimental ingest storage. #10412
* [ENHANCEMENT] Ingester: Hide tokens in ingester ring status page when ingest storage is enabled #10399
* [ENHANCEMENT] Ingester: add `active_series_additional_custom_trackers` configuration, in addition to the already existing `active_series_custom_trackers`. The `active_series_additional_custom_trackers` configuration allows you to configure additional custom trackers that get merged with `active_series_custom_trackers` at runtime. #10428
* [BUGFIX] Distributor: Use a boolean to track changes while merging the ReplicaDesc components, rather than comparing the objects directly. #10185
* [BUGFIX] Querier: fix timeout responding to query-frontend when response size is very close to `-querier.frontend-client.grpc-max-send-msg-size`. #10154
* [BUGFIX] Query-frontend and querier: show warning/info annotations in some cases where they were missing (if a lazy querier was used). #10277
Expand All @@ -31,6 +34,7 @@
* [BUGFIX] PromQL: Fix <aggr_over_time> functions with histograms https://github.com/prometheus/prometheus/pull/15711 #10400
* [BUGFIX] MQE: Fix <aggr_over_time> functions with histograms #10400
* [BUGFIX] Distributor: return HTTP status 415 Unsupported Media Type instead of 200 Success for Remote Write 2.0 until we support it. #10423
* [BUGFIX] Query-frontend: Add flag `-query-frontend.prom2-range-compat` and corresponding YAML to rewrite queries with ranges that worked in Prometheus 2 but are invalid in Prometheus 3. #10445 #10461

### Mixin

Expand Down
23 changes: 22 additions & 1 deletion cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -4020,13 +4020,23 @@
"kind": "field",
"name": "active_series_custom_trackers",
"required": false,
"desc": "Additional custom trackers for active metrics. If there are active series matching a provided matcher (map value), the count will be exposed in the custom trackers metric labeled using the tracker name (map key). Zero valued counts are not exposed (and removed when they go back to zero).",
"desc": "Custom trackers for active metrics. If there are active series matching a provided matcher (map value), the count is exposed in the custom trackers metric labeled using the tracker name (map key). Zero-valued counts are not exposed and are removed when they go back to zero.",
"fieldValue": null,
"fieldDefaultValue": {},
"fieldFlag": "ingester.active-series-custom-trackers",
"fieldType": "map of tracker name (string) to matcher (string)",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "active_series_additional_custom_trackers",
"required": false,
"desc": "Additional custom trackers for active metrics merged on top of the base custom trackers. You can use this configuration option to define the base custom trackers globally for all tenants, and then use the additional trackers to add extra trackers on a per-tenant basis.",
"fieldValue": null,
"fieldDefaultValue": {},
"fieldType": "map of tracker name (string) to matcher (string)",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "out_of_order_time_window",
Expand Down Expand Up @@ -4338,6 +4348,17 @@
"fieldType": "string",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "prom2_range_compat",
"required": false,
"desc": "Rewrite queries using the same range selector and resolution [X:X] which don't work in Prometheus 3.0 to a nearly identical form that works with Prometheus 3.0 semantics",
"fieldValue": null,
"fieldDefaultValue": false,
"fieldFlag": "query-frontend.prom2-range-compat",
"fieldType": "boolean",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "cardinality_analysis_enabled",
Expand Down
2 changes: 2 additions & 0 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -2289,6 +2289,8 @@ Usage of ./cmd/mimir/mimir:
Maximum time to wait for the query-frontend to become ready before rejecting requests received before the frontend was ready. 0 to disable (i.e. fail immediately if a request is received while the frontend is still starting up) (default 2s)
-query-frontend.parallelize-shardable-queries
True to enable query sharding.
-query-frontend.prom2-range-compat
[experimental] Rewrite queries using the same range selector and resolution [X:X] which don't work in Prometheus 3.0 to a nearly identical form that works with Prometheus 3.0 semantics
-query-frontend.prune-queries
[experimental] True to enable pruning dead code (eg. expressions that cannot produce any results) and simplifying expressions (eg. expressions that can be evaluated immediately) in queries.
-query-frontend.querier-forget-delay duration
Expand Down
5 changes: 3 additions & 2 deletions development/mimir-ingest-storage/config/mimir.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ ingest_storage:
address: kafka_1:9092
topic: mimir-ingest
last_produced_offset_poll_interval: 500ms
startup_fetch_concurrency: 15
ongoing_fetch_concurrency: 2
fetch_concurrency_max: 15

ingester:
track_ingester_owned_series: true
active_series_metrics_update_period: 10s
active_series_metrics_idle_timeout: 1m

partition_ring:
min_partition_owners_count: 1
Expand Down
7 changes: 7 additions & 0 deletions development/mimir-ingest-storage/config/runtime.yaml
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
# This file can be used to set overrides or other runtime config.
overrides:
anonymous:
active_series_custom_trackers:
base_mimir_write: '{job="mimir-read-write-mode/mimir-write"}'
base_mimir_read: '{job="mimir-read-write-mode/mimir-read"}'
active_series_additional_custom_trackers:
additional_mimir_backend: '{job="mimir-read-write-mode/mimir-backend"}'
4 changes: 2 additions & 2 deletions docs/sources/helm-charts/mimir-distributed/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ keywords:
- Grafana Enterprise Metrics
- Grafana metrics
cascade:
MIMIR_VERSION: "v2.14.x"
GEM_VERSION: "v2.14.x"
MIMIR_VERSION: "v2.15.x"
GEM_VERSION: "v2.15.x"
ALLOY_VERSION: "latest"
---

Expand Down
31 changes: 25 additions & 6 deletions docs/sources/mimir/configure/configuration-parameters/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3359,20 +3359,33 @@ The `limits` block configures default and per-tenant limits imposed by component
# CLI flag: -ingester.ooo-native-histograms-ingestion-enabled
[ooo_native_histograms_ingestion_enabled: <boolean> | default = false]

# (advanced) Additional custom trackers for active metrics. If there are active
# series matching a provided matcher (map value), the count will be exposed in
# the custom trackers metric labeled using the tracker name (map key). Zero
# valued counts are not exposed (and removed when they go back to zero).
# (advanced) Custom trackers for active metrics. If there are active series
# matching a provided matcher (map value), the count is exposed in the custom
# trackers metric labeled using the tracker name (map key). Zero-valued counts
# are not exposed and are removed when they go back to zero.
# Example:
# The following configuration will count the active series coming from dev and
# prod namespaces for each tenant and label them as {name="dev"} and
# The following configuration counts the active series coming from dev and
# prod namespaces for each tenant and labels them as {name="dev"} and
# {name="prod"} in the cortex_ingester_active_series_custom_tracker metric.
# active_series_custom_trackers:
# dev: '{namespace=~"dev-.*"}'
# prod: '{namespace=~"prod-.*"}'
# CLI flag: -ingester.active-series-custom-trackers
[active_series_custom_trackers: <map of tracker name (string) to matcher (string)> | default = ]

# (advanced) Additional custom trackers for active metrics merged on top of the
# base custom trackers. You can use this configuration option to define the base
# custom trackers globally for all tenants, and then use the additional trackers
# to add extra trackers on a per-tenant basis.
# Example:
# The following configuration counts the active series coming from dev and
# prod namespaces for each tenant and labels them as {name="dev"} and
# {name="prod"} in the cortex_ingester_active_series_custom_tracker metric.
# active_series_additional_custom_trackers:
# dev: '{namespace=~"dev-.*"}'
# prod: '{namespace=~"prod-.*"}'
[active_series_additional_custom_trackers: <map of tracker name (string) to matcher (string)> | default = ]

# (experimental) Non-zero value enables out-of-order support for most recent
# samples that are within the time window in relation to the TSDB's maximum
# time, i.e., within [db.maxTime-timeWindow, db.maxTime]). The ingester will
Expand Down Expand Up @@ -3559,6 +3572,12 @@ The `limits` block configures default and per-tenant limits imposed by component
# CLI flag: -query-frontend.enabled-promql-experimental-functions
[enabled_promql_experimental_functions: <string> | default = ""]

# (experimental) Rewrite queries using the same range selector and resolution
# [X:X] which don't work in Prometheus 3.0 to a nearly identical form that works
# with Prometheus 3.0 semantics
# CLI flag: -query-frontend.prom2-range-compat
[prom2_range_compat: <boolean> | default = false]

# Enables endpoints used for cardinality analysis.
# CLI flag: -querier.cardinality-analysis-enabled
[cardinality_analysis_enabled: <boolean> | default = false]
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ require (
sigs.k8s.io/yaml v1.4.0 // indirect
)

replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250110020350-a1e2bcf4a615
replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250116135451-914982745659

// Replace memberlist with our fork which includes some fixes that haven't been
// merged upstream yet:
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1283,8 +1283,8 @@ github.com/grafana/gomemcache v0.0.0-20241016125027-0a5bcc5aef40 h1:1TeKhyS+pvzO
github.com/grafana/gomemcache v0.0.0-20241016125027-0a5bcc5aef40/go.mod h1:IGRj8oOoxwJbHBYl1+OhS9UjQR0dv6SQOep7HqmtyFU=
github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU=
github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/grafana/mimir-prometheus v0.0.0-20250110020350-a1e2bcf4a615 h1:lr3wUcXU0mScCDn/4NXc0CYglZJfy5l35sOJFar9qE0=
github.com/grafana/mimir-prometheus v0.0.0-20250110020350-a1e2bcf4a615/go.mod h1:KfyZCeyGxf5gvl6VZbrQsd400nJjGw+ygMEtDVZKIT4=
github.com/grafana/mimir-prometheus v0.0.0-20250116135451-914982745659 h1:OfkJoA8D1dg3zMW3kDMkDdbcMBlNqDfCFSZgPcMToOQ=
github.com/grafana/mimir-prometheus v0.0.0-20250116135451-914982745659/go.mod h1:KfyZCeyGxf5gvl6VZbrQsd400nJjGw+ygMEtDVZKIT4=
github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956 h1:em1oddjXL8c1tL0iFdtVtPloq2hRPen2MJQKoAWpxu0=
github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU=
github.com/grafana/prometheus-alertmanager v0.25.1-0.20240930132144-b5e64e81e8d3 h1:6D2gGAwyQBElSrp3E+9lSr7k8gLuP3Aiy20rweLWeBw=
Expand Down
3 changes: 2 additions & 1 deletion operations/helm/charts/mimir-distributed/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ Entries should include a reference to the Pull Request that introduced the chang
* [ENHANCEMENT] Individual mimir components can override their container images via the *.image values. The component's image definitions always override the values set in global `image` or `enterprise.image`. #10340
* [BUGFIX] Fix calculation of `mimir.siToBytes` and use floating point arithmetics. #10044

## 5.6.0-rc.0
## 5.6.0

* [ENHANCEMENT] Upgrade Mimir and GEM to [2.15.0-rc.0](https://github.com/grafana/mimir/blob/main/CHANGELOG.md#2150). #10369
* [CHANGE] Update rollout-operator version to 0.20.0. #9995
* [CHANGE] Remove the `track_sizes` feature for Memcached pods since it is unused. #10032
* [FEATURE] Add support for GEM's federation-frontend. See the `federation_frontend` section in the values file. #9673
Expand Down
4 changes: 2 additions & 2 deletions operations/helm/charts/mimir-distributed/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: v2
version: 5.6.0-weekly.325
appVersion: r325
version: 5.6.0
appVersion: 2.15.0
description: "Grafana Mimir"
home: https://grafana.com/docs/helm-charts/mimir-distributed/latest/
icon: https://grafana.com/static/img/logos/logo-mimir.svg
Expand Down
4 changes: 2 additions & 2 deletions operations/helm/charts/mimir-distributed/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Helm chart for deploying [Grafana Mimir](https://grafana.com/docs/mimir/latest/)

For the full documentation, visit [Grafana mimir-distributed Helm chart documentation](https://grafana.com/docs/helm-charts/mimir-distributed/latest/).

> **Note:** The documentation version is derived from the Helm chart version which is 5.6.0-weekly.325.
> **Note:** The documentation version is derived from the Helm chart version which is 5.6.0.
When upgrading from Helm chart version 4.X, please see [Migrate the Helm chart from version 4.x to 5.0](https://grafana.com/docs/helm-charts/mimir-distributed/latest/migration-guides/migrate-helm-chart-4.x-to-5.0/).
When upgrading from Helm chart version 3.x, please see [Migrate from single zone to zone-aware replication with Helm](https://grafana.com/docs/helm-charts/mimir-distributed/latest/migration-guides/migrate-from-single-zone-with-helm/).
Expand All @@ -14,7 +14,7 @@ When upgrading from Helm chart version 2.1, please see [Upgrade the Grafana Mimi

# mimir-distributed

![Version: 5.6.0-weekly.325](https://img.shields.io/badge/Version-5.6.0--weekly.325-informational?style=flat-square) ![AppVersion: r325](https://img.shields.io/badge/AppVersion-r325-informational?style=flat-square)
![Version: 5.6.0](https://img.shields.io/badge/Version-5.6.0-informational?style=flat-square) ![AppVersion: 2.15.0](https://img.shields.io/badge/AppVersion-2.15.0-informational?style=flat-square)

Grafana Mimir

Expand Down
4 changes: 2 additions & 2 deletions operations/helm/charts/mimir-distributed/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ image:
# -- Grafana Mimir container image repository. Note: for Grafana Enterprise Metrics use the value 'enterprise.image.repository'
repository: grafana/mimir
# -- Grafana Mimir container image tag. Note: for Grafana Enterprise Metrics use the value 'enterprise.image.tag'
tag: r325-ed3160e
tag: 2.15.0
# -- Container pull policy - shared between Grafana Mimir and Grafana Enterprise Metrics
pullPolicy: IfNotPresent
# -- Optionally specify an array of imagePullSecrets - shared between Grafana Mimir and Grafana Enterprise Metrics
Expand Down Expand Up @@ -4037,7 +4037,7 @@ enterprise:
# -- Grafana Enterprise Metrics container image repository. Note: for Grafana Mimir use the value 'image.repository'
repository: grafana/enterprise-metrics
# -- Grafana Enterprise Metrics container image tag. Note: for Grafana Mimir use the value 'image.tag'
tag: r325-624b3501
tag: v2.15.0
# Note: pullPolicy and optional pullSecrets are set in toplevel 'image' section, not here

# In order to use Grafana Enterprise Metrics features, you will need to provide the contents of your Grafana Enterprise Metrics
Expand Down
44 changes: 44 additions & 0 deletions pkg/frontend/querymiddleware/astmapper/prom2_range_compat.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// SPDX-License-Identifier: AGPL-3.0-only

package astmapper

import (
"context"
"time"

"github.com/prometheus/prometheus/promql/parser"
)

// NewProm2RangeCompat creates a new ASTMapper which modifies the range of subqueries
// with identical ranges and steps (which used to returns results in Prometheus 2 since
// range selectors were left closed right closed) to be compatible with Prometheus 3
// range selectors which are left open right closed.
func NewProm2RangeCompat(ctx context.Context) ASTMapper {
compat := &prom2RangeCompat{ctx: ctx}
return NewASTExprMapper(compat)
}

type prom2RangeCompat struct {
ctx context.Context
}

func (c prom2RangeCompat) MapExpr(expr parser.Expr) (mapped parser.Expr, finished bool, err error) {
if err := c.ctx.Err(); err != nil {
return nil, false, err
}

e, ok := expr.(*parser.SubqueryExpr)
if !ok {
return expr, false, nil
}

// Due to range selectors being left open right closed in Prometheus 3, subqueries with identical
// range and step will only select a single datapoint which breaks functions that need multiple
// points (rate, increase). Adjust the range here slightly to ensure that multiple data points
// are returned to match the Prometheus 2 behavior.
if e.Range == e.Step {
e.Range = e.Range + time.Millisecond
}

return e, false, nil
}
64 changes: 64 additions & 0 deletions pkg/frontend/querymiddleware/astmapper/prom2_range_compat_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// SPDX-License-Identifier: AGPL-3.0-only

package astmapper

import (
"context"
"testing"

"github.com/prometheus/prometheus/promql/parser"
"github.com/stretchr/testify/require"
)

func TestProm2RangeCompat_Cancellation(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()

query, _ := parser.ParseExpr(`up{foo="bar"}`)
mapper := NewProm2RangeCompat(ctx)
_, err := mapper.Map(query)

require.ErrorIs(t, err, context.Canceled)
}

func TestProm2RangeCompat_Queries(t *testing.T) {
type testCase struct {
query string
expectedQuery string
}

testCases := []testCase{
{
query: `sum(rate(some_series{job="foo"}[1m]))`,
expectedQuery: `sum(rate(some_series{job="foo"}[1m]))`,
},
{
query: `sum(rate(some_series{job="foo"}[1m:1m]))`,
expectedQuery: `sum(rate(some_series{job="foo"}[1m1ms:1m]))`,
},
{
query: `sum(rate(some_series{job="foo"}[1h]))`,
expectedQuery: `sum(rate(some_series{job="foo"}[1h]))`,
},
{
query: `sum(rate(some_series{job="foo"}[1h:1h]))`,
expectedQuery: `sum(rate(some_series{job="foo"}[1h1ms:1h]))`,
},
{
query: `sum(rate(some_series{job="foo"}[1h:1h])) / sum(rate(other_series{job="foo"}[1m:1m]))`,
expectedQuery: `sum(rate(some_series{job="foo"}[1h1ms:1h])) / sum(rate(other_series{job="foo"}[1m1ms:1m]))`,
},
}

for _, tc := range testCases {
t.Run(tc.query, func(t *testing.T) {
query, err := parser.ParseExpr(tc.query)
require.NoError(t, err)

mapper := NewProm2RangeCompat(context.Background())
mapped, err := mapper.Map(query)
require.NoError(t, err)
require.Equal(t, tc.expectedQuery, mapped.String())
})
}
}
Loading

0 comments on commit a7b348d

Please sign in to comment.