-
Notifications
You must be signed in to change notification settings - Fork 486
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
29a4b7c
commit 247728d
Showing
7 changed files
with
312 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Benchmark notes | ||
|
||
These are synthetic benchmarks meant to represent common workloads. These are not meant to be exhaustive or fine grained. | ||
These will give a coarse idea of how the agent behaves in a sitations. | ||
|
||
## Running the benchmarks | ||
|
||
Running `PROM_USERNAME="" PROM_PASSWORD="" ./benchmark.sh` will start the benchmark and run for 8 hours. The duration and type of tests | ||
can be adjusted by editting the `benchmark.sh` file. This will start two Agents and the benchmark runner. Relevant CPU and memory metrics | ||
will be sent to the endpoint described in `normal.river`. | ||
|
||
TODO: Add mixin for graph I am using | ||
|
||
## Adjusting the benchmark | ||
|
||
Each benchmark can be adjusted within `test.river`. These settings allow fine tuning to a specific scenario. Each `prometheus.test.metric` component | ||
exposes a service discovery URL that is used to collect the targets. | ||
|
||
## Benchmark categories | ||
|
||
### prometheus.test.metrics "single" | ||
|
||
This roughly represents a single node exporter and is the simpliest use case. Every `10m` 5% of the metrics are replaced driven by `churn_percent`. | ||
|
||
### prometheus.test.metrics "many" | ||
|
||
This roughly represents scraping many node_exporter instances in say a Kubernetes environment. | ||
|
||
### prometheus.test.metrics "large" | ||
|
||
This represents scraping 2 very large instances with 1,000,000 series. | ||
|
||
### prometheus.test.metrics "churn" | ||
|
||
This represents a worst case scenario, 2 large instances with an extremely high churn rate. | ||
|
||
## Adjusting the tests | ||
|
||
`prometheus.relabel` is often a CPU bottleneck so adding additional rules allows you to test the impact of that. | ||
|
||
## Rules | ||
|
||
There are existing rules to only send to the prometheus remote write the specific metrics that matter. These are tagged with the `runtype` and the benchmark. For instance `normal-large`. | ||
|
||
The benchmark starts an endpoint to consume the metrics from `prometheus.test.metrics`, in half the tests it will return HTTP Status 200 and in the other half will return 500. | ||
|
||
TODO add optional pyroscope profiles |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
go build ./main.go | ||
|
||
# each test is ran with the first argument being the name , the second whether the endpoint accepts metrics, the third for the duration and the last being the discovery | ||
# endpont. See test.river for details on each endpoint. | ||
./main churn true 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.churn/discovery" | ||
./main churn false 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.churn/discovery" | ||
|
||
./main single true 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.single/discovery" | ||
./main single false 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.single/discovery" | ||
|
||
./main many true 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.many/discovery" | ||
./main many false 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.many/discovery" | ||
|
||
./main large true 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.large/discovery" | ||
./main large false 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.large/discovery" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"net/http" | ||
"os" | ||
"os/exec" | ||
"strconv" | ||
"syscall" | ||
"time" | ||
|
||
"github.com/gorilla/mux" | ||
) | ||
|
||
// main handles creating the benchmark. | ||
func main() { | ||
username := os.Getenv("PROM_USERNAME") | ||
if username == "" { | ||
panic("PROM_USERNAME env must be set") | ||
} | ||
password := os.Getenv("PROM_PASSWORD") | ||
if password == "" { | ||
panic("PROM_PASSWORD env must be set") | ||
} | ||
|
||
// Start the HTTP server, that can swallow requests. | ||
go httpServer() | ||
// Build the agent | ||
buildAgent() | ||
|
||
name := os.Args[1] | ||
allowWal := os.Args[2] | ||
duration := os.Args[3] | ||
discovery := os.Args[4] | ||
allowWalBool, _ := strconv.ParseBool(allowWal) | ||
parsedDuration, _ := time.ParseDuration(duration) | ||
fmt.Println(name, allowWalBool, parsedDuration, discovery) | ||
startRun(name, allowWalBool, parsedDuration, discovery) | ||
|
||
} | ||
|
||
func startRun(name string, allowWAL bool, run time.Duration, discovery string) { | ||
os.RemoveAll("./data/normal-data") | ||
os.RemoveAll("./data/test-data") | ||
|
||
allow = allowWAL | ||
_ = os.Setenv("NAME", name) | ||
_ = os.Setenv("ALLOW_WAL", strconv.FormatBool(allowWAL)) | ||
_ = os.Setenv("DISCOVERY", discovery) | ||
|
||
metric := startMetricsAgent() | ||
fmt.Println("starting metric agent") | ||
defer metric.Process.Kill() | ||
defer metric.Process.Release() | ||
defer metric.Wait() | ||
defer syscall.Kill(-metric.Process.Pid, syscall.SIGKILL) | ||
defer os.RemoveAll("./data/test-data") | ||
|
||
old := startNormalAgent() | ||
fmt.Println("starting normal agent") | ||
defer old.Process.Kill() | ||
defer old.Process.Release() | ||
defer old.Wait() | ||
defer syscall.Kill(-old.Process.Pid, syscall.SIGKILL) | ||
defer os.RemoveAll("./data/normal-data") | ||
|
||
time.Sleep(run) | ||
} | ||
|
||
func buildAgent() { | ||
cmd := exec.Command("go", "build", "../grafana-agent-flow") | ||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} | ||
cmd.Stdout = os.Stdout | ||
cmd.Stderr = os.Stderr | ||
err := cmd.Run() | ||
if err != nil { | ||
panic(err.Error()) | ||
} | ||
} | ||
|
||
func startNormalAgent() *exec.Cmd { | ||
cmd := exec.Command("./grafana-agent-flow", "run", "./normal.river", "--storage.path=./data/normal-data", "--server.http.listen-addr=127.0.0.1:12346") | ||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} | ||
//cmd.Stdout = os.Stdout | ||
//cmd.Stderr = os.Stderr | ||
err := cmd.Start() | ||
if err != nil { | ||
panic(err.Error()) | ||
} | ||
return cmd | ||
} | ||
|
||
func startMetricsAgent() *exec.Cmd { | ||
cmd := exec.Command("./grafana-agent-flow", "run", "./test.river", "--storage.path=./data/test-data", "--server.http.listen-addr=127.0.0.1:9001") | ||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} | ||
err := cmd.Start() | ||
if err != nil { | ||
panic(err.Error()) | ||
} | ||
return cmd | ||
} | ||
|
||
var allow = false | ||
|
||
func httpServer() { | ||
r := mux.NewRouter() | ||
r.HandleFunc("/post", func(w http.ResponseWriter, r *http.Request) { | ||
handlePost(w, r) | ||
}) | ||
r.HandleFunc("/allow", func(w http.ResponseWriter, r *http.Request) { | ||
println("allowing") | ||
allow = true | ||
}) | ||
r.HandleFunc("/block", func(w http.ResponseWriter, r *http.Request) { | ||
println("blocking") | ||
allow = false | ||
}) | ||
http.Handle("/", r) | ||
println("Starting server") | ||
err := http.ListenAndServe(":8888", nil) | ||
if err != nil { | ||
println(err) | ||
} | ||
} | ||
|
||
func handlePost(w http.ResponseWriter, r *http.Request) { | ||
if allow { | ||
return | ||
} else { | ||
println("returning 500") | ||
w.WriteHeader(500) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
|
||
|
||
logging { | ||
level = "debug" | ||
} | ||
|
||
|
||
discovery.http "disco" { | ||
url = env("DISCOVERY") | ||
} | ||
|
||
|
||
prometheus.scrape "scraper" { | ||
targets = concat([{"__address__" = "localhost:12346"}]) | ||
forward_to = [prometheus.relabel.mutator.receiver] | ||
scrape_interval = "60s" | ||
} | ||
|
||
|
||
prometheus.relabel "mutator" { | ||
rule { | ||
source_labels = ["__name__"] | ||
regex = "(.+)" | ||
replacement = "normal" | ||
target_label = "runtype" | ||
} | ||
rule { | ||
source_labels = ["__name__"] | ||
regex = "(.+)" | ||
replacement = env("NAME") | ||
target_label = "test_name" | ||
} | ||
rule { | ||
source_labels = ["__name__"] | ||
regex = "(.+)" | ||
replacement = env("ALLOW_WAL") | ||
target_label = "remote_write_enable" | ||
} | ||
rule { | ||
source_labels = ["__name__"] | ||
regex = "(.+)" | ||
replacement = env("DISCOVERY") | ||
target_label = "discovery" | ||
} | ||
|
||
|
||
rule { | ||
source_labels = ["__name__"] | ||
action = "keep" | ||
regex = "(agent_wal_storage_active_series|agent_resources_process_cpu_seconds_total|go_memstats_alloc_bytes|go_gc_duration_seconds_sum|go_gc_duration_seconds_count)" | ||
} | ||
|
||
|
||
forward_to = [prometheus.remote_write.agent_stats.receiver] | ||
} | ||
|
||
prometheus.remote_write "agent_stats" { | ||
endpoint { | ||
url = "https://prometheus-us-central1.grafana.net/api/prom/push" | ||
basic_auth { | ||
username = env("PROM_USERNAME") | ||
password = env("PROM_PASSWORD") | ||
} | ||
} | ||
} | ||
|
||
prometheus.scrape "data" { | ||
targets = discovery.http.disco.targets | ||
forward_to = [prometheus.remote_write.empty.receiver] | ||
scrape_interval = "60s" | ||
} | ||
|
||
prometheus.remote_write "empty" { | ||
endpoint { | ||
url = "http://localhost:8888/post" | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// This is meant to mimic handling a single node_exporter instance. | ||
prometheus.test.metrics "single" { | ||
number_of_instances = 1 | ||
number_of_metrics = 2000 | ||
number_of_labels = 5 | ||
metrics_refresh = "10m" | ||
churn_percent = 0.05 | ||
} | ||
|
||
// This is meant to mimic handling many node_exporter instances. | ||
prometheus.test.metrics "many" { | ||
number_of_instances = 1000 | ||
number_of_metrics = 2000 | ||
number_of_labels = 5 | ||
metrics_refresh = "10m" | ||
churn_percent = 0.05 | ||
} | ||
|
||
prometheus.test.metrics "large" { | ||
number_of_instances = 2 | ||
number_of_metrics = 1000000 | ||
number_of_labels = 9 | ||
metrics_refresh = "10m" | ||
churn_percent = 0.05 | ||
} | ||
|
||
prometheus.test.metrics "churn" { | ||
number_of_instances = 2 | ||
number_of_metrics = 200000 | ||
number_of_labels = 12 | ||
metrics_refresh = "10m" | ||
churn_percent = 0.50 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
module github.com/grafana/agent | ||
|
||
go 1.21.0 | ||
go 1.21 | ||
|
||
require ( | ||
cloud.google.com/go/pubsub v1.33.0 | ||
|