Skip to content

Commit

Permalink
fix(analyse_prometheus): store errors instead of exit
Browse files Browse the repository at this point in the history
Fixes: grafana/cortex-tools#236

Signed-off-by: Furkan <[email protected]>
  • Loading branch information
Dentrax committed Sep 27, 2022
1 parent 44bd7bc commit 2ef7544
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

### Mimirtool

* [ENHANCEMENT] mimirtool analyze: Store the query errors instead of exit during the analysis. #3052
* [BUGFIX] mimir-tool remote-read: fix returns where some conditions [return nil error even if there is error](https://github.com/grafana/cortex-tools/issues/260). #3053

### Query-tee
Expand Down
2 changes: 2 additions & 0 deletions pkg/mimirtool/analyze/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ type MetricsInPrometheus struct {

InUseMetricCounts []MetricCount `json:"in_use_metric_counts"`
AdditionalMetricCounts []MetricCount `json:"additional_metric_counts"`

Errors []string `json:"errors"`
}

type MetricCount struct {
Expand Down
14 changes: 11 additions & 3 deletions pkg/mimirtool/commands/analyse_prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package commands
import (
"context"
"encoding/json"
"fmt"
"os"
"sort"
"time"
Expand Down Expand Up @@ -98,15 +99,18 @@ func (cmd *PrometheusAnalyzeCommand) run(k *kingpin.ParseContext) error {
jobCount map[string]int
}{}
inUseCardinality := 0

var errorMetrics []string
for _, metric := range metricsUsed {
ctx, cancel := context.WithTimeout(context.Background(), cmd.readTimeout)
defer cancel()

query := "count by (job) (" + metric + ")"
result, _, err := v1api.Query(ctx, query, time.Now())
if err != nil {
return errors.Wrap(err, "error querying "+query)
errStr := fmt.Sprintf("skipped %s analysis because failed to run query %v: %s", metric, query, err.Error())
log.Warnln(errStr)
errorMetrics = append(errorMetrics, errStr)
continue
}

vec := result.(model.Vector)
Expand Down Expand Up @@ -155,7 +159,10 @@ func (cmd *PrometheusAnalyzeCommand) run(k *kingpin.ParseContext) error {
query := "count by (job) (" + metric + ")"
result, _, err := v1api.Query(ctx, query, time.Now())
if err != nil {
return errors.Wrap(err, "error querying "+query)
errStr := fmt.Sprintf("skipped %s analysis because failed to run query %v: %s", metric, query, err.Error())
log.Warnln(errStr)
errorMetrics = append(errorMetrics, errStr)
continue
}

vec := result.(model.Vector)
Expand Down Expand Up @@ -191,6 +198,7 @@ func (cmd *PrometheusAnalyzeCommand) run(k *kingpin.ParseContext) error {
output.TotalActiveSeries = inUseCardinality + additionalMetricsCardinality
output.InUseActiveSeries = inUseCardinality
output.AdditionalActiveSeries = additionalMetricsCardinality
output.Errors = errorMetrics

for metric, counts := range inUseMetrics {
jobCounts := make([]analyze.JobCount, 0, len(counts.jobCount))
Expand Down

0 comments on commit 2ef7544

Please sign in to comment.