Added mtypes CLI for generating realistic avalanche metric type distr…

…ibutions. Initially added in bwplotka/prombenchy#12, but it might belong here more. Signed-off-by: bwplotka <[email protected]>
prometheus-community · Oct 8, 2024 · e75bb9f · e75bb9f
1 parent 5bc0599
commit e75bb9f
Show file tree

Hide file tree

Showing 9 changed files with 1,360 additions and 21 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,3 @@
-avalanche
+./avalanche
+.build/
+.idea/
diff --git a/Dockerfile b/Dockerfile
@@ -6,6 +6,7 @@ LABEL maintainer="The Prometheus Authors <[email protected]
 ARG ARCH="amd64"
 ARG OS="linux"
 COPY .build/${OS}-${ARCH}/avalanche /bin/avalanche
+COPY .build/${OS}-${ARCH}/mtypes /bin/mtypes
 
 EXPOSE      9101
 USER        nobody

diff --git a/README.md b/README.md
@@ -9,29 +9,41 @@ This allows load testing services that can scrape (e.g. Prometheus, OpenTelemetr
 
 Metric names and unique series change over time to simulate series churn.
 
-Checkout the [blog post](https://blog.freshtracks.io/load-testing-prometheus-metric-ingestion-5b878711711c).
+Checkout the (old-ish) [blog post](https://blog.freshtracks.io/load-testing-prometheus-metric-ingestion-5b878711711c).
 
-## configuration flags
+## Installing
+
+### Locally
 
 ```bash
-avalanche --help
+go install github.com/prometheus-community/avalanche/cmd/avalanche@latest
+${GOPATH}/bin/avalanche --help
 ```
 
-## run Docker image
+### Docker 
 
 ```bash
-docker run quay.io/prometheuscommunity/avalanche:main --help
+docker run quay.io/prometheuscommunity/avalanche:latest --help
 ```
 
-## Endpoints
+NOTE: We recommend using pinned image to a certain version (see all tags [here](https://quay.io/repository/prometheuscommunity/avalanche?tab=tags&tag=latest))
+
+## Using
+
+See [example](example/kubernetes-deployment.yaml) k8s manifest for deploying avalanche as an always running scrape target.
+
+### Configuration
+
+See `--help` for all flags and their documentation.
+
+Notably, from 0.6.0 version, `avalanche` allows specifying various counts per various metric types.
+
+You can choose you own distribution, but usually it makes more sense to mimic realistic distribution used by your example targets. Feel free to use a [handy `mtypes` Go CLI](./cmd/mtypes) to gather type distributions from a target and generate avalanche flags from it.
+
+On top of scrape target functionality, avalanche is capable of Remote Write client load simulation, following the same, configured metric distribution via `--remote*` flags.
+
+### Endpoints
 
 Two endpoints are available :
 * `/metrics` - metrics endpoint
 * `/health` - healthcheck endpoint
-
-## build and run go binary
-
-```bash
-go install github.com/prometheus-community/avalanche/cmd@latest
-go/bin/cmd --help
-```
diff --git a/cmd/avalanche.go → cmd/avalanche/avalanche.go b/cmd/avalanche.go → cmd/avalanche/avalanche.go
diff --git a/cmd/mtypes/README.md b/cmd/mtypes/README.md
@@ -0,0 +1,50 @@
+# mtypes
+
+Go CLI gathering statistics around the distribution of types, average number of buckets (and more) across your Prometheus metrics/series.
+
+## Usage
+
+The main usage allows to take resource (from stdin, file or HTTP /metrics endpoint) and calculate type statistics e.g.:
+
+```bash
+$ mtypes -resource=http://localhost:9090/metrics
+$ mtypes -resource=./metrics.prometheus.txt
+$ cat ./metrics.prometheus.txt | mtypes
+```
+
+```bash 
+Metric Type    Metric Families    Series    Series %     Series % (complex type adjusted)    Average Buckets/Objectives
+GAUGE          77                 94        30.618893    15.112540                           -
+COUNTER        104                167       54.397394    26.848875                           -
+HISTOGRAM      11                 19        6.188925     39.710611                           11.000000
+SUMMARY        15                 27        8.794788     18.327974                           2.222222
+```
+
+> NOTE: "Adjusted" series, means actual number of individual series stored in Prometheus. Classic histograms and summaries are stored as a set of counters. This is relevant as the cost of indexing new series is higher than storing complex values (this is why we slowly move to native histograms).
+
+Additionally, you can pass `--avalanche-flags-for-adjusted-series=10000` to print Avalanche v0.6.0+ flags to configure, for avalanche to generate metric target with the given amount of adjusted series, while maintaining a similar distribution e.g.
+
+```bash
+cat ../../manifests/load/exampleprometheustarget.txt | go run main.go --avalanche-flags-for-adjusted-series=10000
+Metric Type    Metric Families    Series (adjusted)    Series (adjusted) %        Average Buckets/Objectives
+GAUGE          77                 94 (94)              30.921053 (15.719064)      -
+COUNTER        104                166 (166)            54.605263 (27.759197)      -
+HISTOGRAM      11                 17 (224)             5.592105 (37.458194)       11.176471
+SUMMARY        15                 27 (114)             8.881579 (19.063545)       2.222222
+---            ---                ---                  ---                        ---
+*              207                304 (598)            100.000000 (100.000000)    -
+
+Avalanche flags for the similar distribution to get to the adjusted series goal of: 10000
+--gauge-metric-count=157
+--counter-metric-count=277
+--histogram-metric-count=28
+--histogram-metric-bucket-count=10
+--native-histogram-metric-count=0
+--summary-metric-count=47
+--summary-metric-objective-count=2
+--series-count=10
+--value-interval=300 # Changes values every 5m.
+--series-interval=3600 # 1h series churn.
+--metric-interval=0
+This should give the total adjusted series to: 9860
+```
diff --git a/cmd/mtypes/main.go b/cmd/mtypes/main.go
@@ -0,0 +1,225 @@
+// Package main implements mtypes CLI, see README for details.
+package main
+
+import (
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"text/tabwriter"
+
+	dto "github.com/prometheus/client_model/go"
+	"github.com/prometheus/common/expfmt"
+)
+
+type stats struct {
+	families, series, buckets, objectives int
+
+	// adjustedSeries represents series that would result in "series" in Prometheus data model
+	// (includes _bucket, _count, _sum, _quantile).
+	adjustedSeries int
+}
+
+var metricType_NATIVE_HISTOGRAM dto.MetricType = 999
+
+func main() {
+	resource := flag.String("resource", "", "Path or URL to the resource (file, <url>/metrics) containing Prometheus metric format.")
+	avalancheFlagsForTotal := flag.Int("avalanche-flags-for-adjusted-series", 0, "If more than zero, it additionally prints flags for the avalanche 0.6.0 command line to generate metrics for the similar type distribution; to get the total number of adjusted series to the given value.")
+	flag.Parse()
+
+	var input io.Reader = os.Stdin
+	if *resource != "" {
+		switch {
+		case strings.HasPrefix(*resource, "https://"), strings.HasPrefix(*resource, "http://"):
+			if _, err := url.Parse(*resource); err != nil {
+				log.Fatalf("error parsing HTTP URL to the resource %v; got %v", *resource, err)
+			}
+			resp, err := http.Get(*resource)
+			if err != nil {
+				log.Fatalf("http get against %v failed", err)
+			}
+			defer resp.Body.Close()
+			input = resp.Body
+		default:
+			// Open the input file.
+			file, err := os.Open(*resource)
+			if err != nil {
+				log.Fatalf("Error opening file: %v", err) //nolint:gocritic
+			}
+			defer file.Close()
+			input = file
+		}
+	}
+	statistics, err := calculateTargetStatistics(input)
+	if err != nil {
+		log.Fatal(err)
+	}
+	var total stats
+	for _, s := range statistics {
+		total.families += s.families
+		total.series += s.series
+		total.adjustedSeries += s.adjustedSeries
+	}
+
+	writeStatistics(os.Stdout, total, statistics)
+
+	if *avalancheFlagsForTotal > 0 {
+		// adjustedGoal is tracking the # of adjusted series we want to generate with avalanche.
+		adjustedGoal := float64(*avalancheFlagsForTotal)
+		fmt.Println()
+		fmt.Println("Avalanche flags for the similar distribution to get to the adjusted series goal of:", adjustedGoal)
+
+		adjustedGoal /= 10.0 // Assuming --series-count=10
+		// adjustedSum is tracking the total sum of series so far (at the end hopefully adjustedSum ~= adjustedGoal)
+		adjustedSum := 0
+		for _, mtype := range allTypes {
+			s := statistics[mtype]
+
+			// adjustedSeriesRatio is tracking the ratio of this type in the input file.
+			// We try to get similar ratio, but with different absolute counts, given the total sum of series we are aiming for.
+			adjustedSeriesRatio := float64(s.adjustedSeries) / float64(total.adjustedSeries)
+
+			// adjustedSeriesForType is tracking (per metric type), how many unique series of that
+			// metric type avalanche needs to create according to the ratio we got from our input.
+			adjustedSeriesForType := int(adjustedGoal * adjustedSeriesRatio)
+
+			switch mtype {
+			case dto.MetricType_GAUGE:
+				fmt.Printf("--gauge-metric-count=%v\n", adjustedSeriesForType)
+				adjustedSum += adjustedSeriesForType
+			case dto.MetricType_COUNTER:
+				fmt.Printf("--counter-metric-count=%v\n", adjustedSeriesForType)
+				adjustedSum += adjustedSeriesForType
+			case dto.MetricType_HISTOGRAM:
+				avgBkts := s.buckets / s.series
+				adjustedSeriesForType /= 2 + avgBkts
+				fmt.Printf("--histogram-metric-count=%v\n", adjustedSeriesForType)
+				fmt.Printf("--histogram-metric-bucket-count=%v\n", avgBkts-1) // -1 is due to caveat of additional +Inf not added by avalanche.
+				adjustedSum += adjustedSeriesForType * (2 + avgBkts)
+			case metricType_NATIVE_HISTOGRAM:
+				fmt.Printf("--native-histogram-metric-count=%v\n", adjustedSeriesForType)
+				adjustedSum += adjustedSeriesForType
+			case dto.MetricType_SUMMARY:
+				avgObjs := s.objectives / s.series
+				adjustedSeriesForType /= 2 + avgObjs
+				fmt.Printf("--summary-metric-count=%v\n", adjustedSeriesForType)
+				fmt.Printf("--summary-metric-objective-count=%v\n", avgObjs)
+				adjustedSum += adjustedSeriesForType * (2 + avgObjs)
+			default:
+				if s.series > 0 {
+					log.Fatalf("not supported %v metric in avalanche", mtype)
+				}
+			}
+		}
+		fmt.Printf("--series-count=10\n")
+		fmt.Printf("--value-interval=300 # Changes values every 5m.\n")
+		fmt.Printf("--series-interval=3600 # 1h series churn.\n")
+		fmt.Printf("--metric-interval=0\n")
+
+		fmt.Println("This should give the total adjusted series to:", adjustedSum*10)
+	}
+}
+
+var allTypes = []dto.MetricType{dto.MetricType_GAUGE, dto.MetricType_COUNTER, dto.MetricType_HISTOGRAM, metricType_NATIVE_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM, dto.MetricType_SUMMARY, dto.MetricType_UNTYPED}
+
+func writeStatistics(writer io.Writer, total stats, statistics map[dto.MetricType]stats) {
+	w := tabwriter.NewWriter(writer, 0, 0, 4, ' ', 0)
+	fmt.Fprintln(w, "Metric Type\tMetric Families\tSeries (adjusted)\tSeries (adjusted) %\tAverage Buckets/Objectives")
+
+	for _, mtype := range allTypes {
+		s, ok := statistics[mtype]
+		if !ok {
+			continue
+		}
+
+		mtypeStr := mtype.String()
+		if mtype == metricType_NATIVE_HISTOGRAM {
+			mtypeStr = "HISTOGRAM (native)"
+		}
+
+		seriesRatio := 100 * float64(s.series) / float64(total.series)
+		adjustedSeriesRatio := 100 * float64(s.adjustedSeries) / float64(total.adjustedSeries)
+		switch {
+		case s.buckets > 0:
+			fmt.Fprintf(w, "%s\t%d\t%d (%d)\t%f (%f)\t%f\n", mtypeStr, s.families, s.series, s.adjustedSeries, seriesRatio, adjustedSeriesRatio, float64(s.buckets)/float64(s.series))
+		case s.objectives > 0:
+			fmt.Fprintf(w, "%s\t%d\t%d (%d)\t%f (%f)\t%f\n", mtypeStr, s.families, s.series, s.adjustedSeries, seriesRatio, adjustedSeriesRatio, float64(s.objectives)/float64(s.series))
+		default:
+			fmt.Fprintf(w, "%s\t%d\t%d (%d)\t%f (%f)\t-\n", mtypeStr, s.families, s.series, s.adjustedSeries, seriesRatio, adjustedSeriesRatio)
+		}
+	}
+	fmt.Fprintf(w, "---\t---\t---\t---\t---\n")
+	fmt.Fprintf(w, "*\t%d\t%d (%d)\t%f (%f)\t-\n", total.families, total.series, total.adjustedSeries, 100.0, 100.0)
+	_ = w.Flush()
+}
+
+func calculateTargetStatistics(r io.Reader) (statistics map[dto.MetricType]stats, _ error) {
+	// Parse the Prometheus Text format.
+	parser := expfmt.NewDecoder(r, expfmt.NewFormat(expfmt.TypeProtoText))
+
+	statistics = map[dto.MetricType]stats{}
+	nativeS := statistics[metricType_NATIVE_HISTOGRAM]
+	for {
+		var mf dto.MetricFamily
+		if err := parser.Decode(&mf); err != nil {
+			if errors.Is(err, io.EOF) {
+				break
+			}
+			return nil, fmt.Errorf("parsing %w", err)
+		}
+
+		s := statistics[mf.GetType()]
+
+		var mfAccounted, mfAccountedNative bool
+		switch mf.GetType() {
+		case dto.MetricType_GAUGE_HISTOGRAM, dto.MetricType_HISTOGRAM:
+			for _, m := range mf.GetMetric() {
+				if m.GetHistogram().GetSchema() == 0 {
+					// classic one.
+					s.series++
+					s.buckets += len(m.GetHistogram().GetBucket())
+					s.adjustedSeries += 2 + len(m.GetHistogram().GetBucket())
+
+					if !mfAccounted {
+						s.families++
+						mfAccounted = true
+					}
+				} else {
+					// native one.
+					nativeS.series++
+					nativeS.buckets += len(m.GetHistogram().GetNegativeDelta())
+					nativeS.buckets += len(m.GetHistogram().GetNegativeCount())
+					nativeS.buckets += len(m.GetHistogram().GetPositiveDelta())
+					nativeS.buckets += len(m.GetHistogram().GetPositiveCount())
+					nativeS.adjustedSeries++
+
+					if !mfAccountedNative {
+						nativeS.families++
+						mfAccountedNative = true
+					}
+				}
+			}
+		case dto.MetricType_SUMMARY:
+			s.series += len(mf.GetMetric())
+			s.families++
+			for _, m := range mf.GetMetric() {
+				s.objectives += len(m.GetSummary().GetQuantile())
+				s.adjustedSeries += 2 + len(m.GetSummary().GetQuantile())
+			}
+		default:
+			s.series += len(mf.GetMetric())
+			s.families++
+			s.adjustedSeries += len(mf.GetMetric())
+		}
+		statistics[mf.GetType()] = s
+	}
+	if nativeS.series > 0 {
+		statistics[metricType_NATIVE_HISTOGRAM] = nativeS
+	}
+	return statistics, nil
+}