From a95ee0fa2d8e15f613a30180b592b6d7717922e7 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 6 Jan 2022 06:38:50 -0800
Subject: [PATCH] Add function to analyse used labels, and not just used
 metrics.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 pkg/commands/analyse.go              | 12 ++++
 pkg/commands/analyse_queries.go      | 95 ++++++++++++++++++++++++++++
 pkg/commands/analyse_queries_test.go | 76 ++++++++++++++++++++++
 3 files changed, 183 insertions(+)
 create mode 100644 pkg/commands/analyse_queries.go
 create mode 100644 pkg/commands/analyse_queries_test.go

diff --git a/pkg/commands/analyse.go b/pkg/commands/analyse.go
index 4b7b987c6..3c9688931 100644
--- a/pkg/commands/analyse.go
+++ b/pkg/commands/analyse.go
@@ -1,6 +1,10 @@
 package commands
 
 import (
+	"encoding/json"
+	"os"
+
+	log "github.com/sirupsen/logrus"
 	"gopkg.in/alecthomas/kingpin.v2"
 )
 
@@ -91,4 +95,12 @@ func (cmd *AnalyseCommand) Register(app *kingpin.Application) {
 	ruleFileAnalyseCmd.Flag("output", "The path for the output file").
 		Default("metrics-in-ruler.json").
 		StringVar(&rfCmd.outputFile)
+
+	analyseCmd.Command("queries", "Extract the used metrics and labels from queries fed in on stdin.").Action(func(_ *kingpin.ParseContext) error {
+		metrics, err := processQueries(os.Stdin)
+		if err != nil {
+			log.Fatalf("failed to process queries: %v", err)
+		}
+		return json.NewEncoder(os.Stdout).Encode(metrics)
+	})
 }
diff --git a/pkg/commands/analyse_queries.go b/pkg/commands/analyse_queries.go
new file mode 100644
index 000000000..57430138a
--- /dev/null
+++ b/pkg/commands/analyse_queries.go
@@ -0,0 +1,95 @@
+package commands
+
+import (
+	"bufio"
+	"io"
+	"sort"
+
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql/parser"
+)
+
+type MetricUsage struct {
+	LabelsUsed []string
+}
+
+func processQueries(r io.Reader) (map[string]MetricUsage, error) {
+	metrics := map[string]MetricUsage{}
+	scanner := bufio.NewScanner(r)
+	for scanner.Scan() {
+		if err := processQuery(scanner.Text(), metrics); err != nil {
+			return nil, err
+		}
+	}
+
+	return metrics, scanner.Err()
+}
+
+func processQuery(query string, metrics map[string]MetricUsage) error {
+	expr, err := parser.ParseExpr(query)
+	if err != nil {
+		return err
+	}
+
+	parser.Inspect(expr, func(node parser.Node, path []parser.Node) error {
+		vs, ok := node.(*parser.VectorSelector)
+		if !ok {
+			return nil
+		}
+
+		metricName, ok := getName(vs.LabelMatchers)
+		if !ok {
+			return nil
+		}
+
+		usedLabels := metrics[metricName]
+
+		// Add any label names from the selectors to the list of used labels.
+		for _, matcher := range vs.LabelMatchers {
+			if matcher.Name == labels.MetricName {
+				continue
+			}
+			setInsert(matcher.Name, &usedLabels.LabelsUsed)
+		}
+
+		// Find any aggregations in the path and add grouping labels.
+		for _, node := range path {
+			ae, ok := node.(*parser.AggregateExpr)
+			if !ok {
+				continue
+			}
+
+			for _, label := range ae.Grouping {
+				setInsert(label, &usedLabels.LabelsUsed)
+			}
+		}
+		metrics[metricName] = usedLabels
+
+		return nil
+	})
+
+	return nil
+}
+
+func getName(matchers []*labels.Matcher) (string, bool) {
+	for _, matcher := range matchers {
+		if matcher.Name == labels.MetricName && matcher.Type == labels.MatchEqual {
+			return matcher.Value, true
+		}
+	}
+	return "", false
+}
+
+func setInsert(label string, labels *[]string) {
+	i := sort.Search(len(*labels), func(i int) bool { return (*labels)[i] >= label })
+	if i < len(*labels) && (*labels)[i] == label {
+		// label is present at labels[i]
+		return
+	}
+
+	// label is not present in labels,
+	// but i is the index where it would be inserted.
+	*labels = append(*labels, "")
+	copy((*labels)[i+1:], (*labels)[i:])
+	(*labels)[i] = label
+}
diff --git a/pkg/commands/analyse_queries_test.go b/pkg/commands/analyse_queries_test.go
new file mode 100644
index 000000000..090bdf11e
--- /dev/null
+++ b/pkg/commands/analyse_queries_test.go
@@ -0,0 +1,76 @@
+package commands
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestSetInsert(t *testing.T) {
+	for _, tc := range []struct {
+		initial  []string
+		value    string
+		expected []string
+	}{
+		{
+			initial:  []string{},
+			value:    "foo",
+			expected: []string{"foo"},
+		},
+		{
+			initial:  []string{"foo"},
+			value:    "foo",
+			expected: []string{"foo"},
+		},
+		{
+			initial:  []string{"foo"},
+			value:    "bar",
+			expected: []string{"bar", "foo"},
+		},
+		{
+			initial:  []string{"bar"},
+			value:    "foo",
+			expected: []string{"bar", "foo"},
+		},
+		{
+			initial:  []string{"bar", "foo"},
+			value:    "bar",
+			expected: []string{"bar", "foo"},
+		},
+	} {
+		setInsert(tc.value, &tc.initial)
+		require.Equal(t, tc.initial, tc.expected)
+	}
+}
+
+func TestProcessQuery(t *testing.T) {
+	for _, tc := range []struct {
+		query    string
+		expected map[string]MetricUsage
+	}{
+		{
+			query: `sum(rate(requests_total{status=~"5.."}[5m])) / sum(rate(requests_total[5m]))`,
+			expected: map[string]MetricUsage{
+				"requests_total": {LabelsUsed: []string{"status"}},
+			},
+		},
+		{
+			query: `sum(rate(requests_sum[5m])) / sum(rate(requests_total[5m]))`,
+			expected: map[string]MetricUsage{
+				"requests_total": {LabelsUsed: nil},
+				"requests_sum":   {LabelsUsed: nil},
+			},
+		},
+		{
+			query: `sum by (path) (rate(requests_total{status=~"5.."}[5m]))`,
+			expected: map[string]MetricUsage{
+				"requests_total": {LabelsUsed: []string{"path", "status"}},
+			},
+		},
+	} {
+		actual := map[string]MetricUsage{}
+		err := processQuery(tc.query, actual)
+		require.NoError(t, err)
+		require.Equal(t, tc.expected, actual)
+	}
+}