-
Notifications
You must be signed in to change notification settings - Fork 487
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OMerge branch 'test_component' of github.com:grafana/agent into test_…
…component
Showing
15 changed files
with
790 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Benchmark notes | ||
|
||
These are synthetic benchmarks meant to represent common workloads. These are not meant to be exhaustive or fine grained. | ||
These will give a coarse idea of how the agent behaves in a sitations. | ||
|
||
## Running the benchmarks | ||
|
||
Running `PROM_USERNAME="" PROM_PASSWORD="" ./benchmark.sh` will start the benchmark and run for 8 hours. The duration and type of tests | ||
can be adjusted by editting the `benchmark.sh` file. This will start two Agents and the benchmark runner. Relevant CPU and memory metrics | ||
will be sent to the endpoint described in `normal.river`. | ||
|
||
TODO: Add mixin for graph I am using | ||
|
||
## Adjusting the benchmark | ||
|
||
Each benchmark can be adjusted within `test.river`. These settings allow fine tuning to a specific scenario. Each `prometheus.test.metric` component | ||
exposes a service discovery URL that is used to collect the targets. | ||
|
||
## Benchmark categories | ||
|
||
### prometheus.test.metrics "single" | ||
|
||
This roughly represents a single node exporter and is the simpliest use case. Every `10m` 5% of the metrics are replaced driven by `churn_percent`. | ||
|
||
### prometheus.test.metrics "many" | ||
|
||
This roughly represents scraping many node_exporter instances in say a Kubernetes environment. | ||
|
||
### prometheus.test.metrics "large" | ||
|
||
This represents scraping 2 very large instances with 1,000,000 series. | ||
|
||
### prometheus.test.metrics "churn" | ||
|
||
This represents a worst case scenario, 2 large instances with an extremely high churn rate. | ||
|
||
## Adjusting the tests | ||
|
||
`prometheus.relabel` is often a CPU bottleneck so adding additional rules allows you to test the impact of that. | ||
|
||
## Rules | ||
|
||
There are existing rules to only send to the prometheus remote write the specific metrics that matter. These are tagged with the `runtype` and the benchmark. For instance `normal-large`. | ||
|
||
The benchmark starts an endpoint to consume the metrics from `prometheus.test.metrics`, in half the tests it will return HTTP Status 200 and in the other half will return 500. | ||
|
||
TODO add optional pyroscope profiles |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
go build -o main | ||
|
||
# each test is ran with the first argument being the name , the second whether the endpoint accepts metrics, the third for the duration and the last being the discovery | ||
# endpont. See test.river for details on each endpoint. | ||
./main metrics churn true 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.churn/discovery" | ||
./main metrics churn false 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.churn/discovery" | ||
|
||
./main metrics single true 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.single/discovery" | ||
./main metrics single false 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.single/discovery" | ||
|
||
./main metrics many true 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.many/discovery" | ||
./main metrics many false 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.many/discovery" | ||
|
||
./main metrics large true 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.large/discovery" | ||
./main metrics large false 1h "http://127.0.0.1:9001/api/v0/component/prometheus.test.metrics.large/discovery" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"os/exec" | ||
"syscall" | ||
"time" | ||
) | ||
|
||
func startLogsRun(run time.Duration) { | ||
allow = true | ||
_ = os.MkdirAll("./data/", 0777) | ||
_ = os.RemoveAll("./data/") | ||
_ = os.Setenv("NAME", "logs") | ||
gen := startLogsGenAgent() | ||
old := startLogsAgent() | ||
fmt.Println("starting logs agent") | ||
defer func() { | ||
_ = old.Process.Kill() | ||
_ = old.Process.Release() | ||
_ = old.Wait() | ||
_ = syscall.Kill(-old.Process.Pid, syscall.SIGKILL) | ||
_ = gen.Process.Kill() | ||
_ = gen.Process.Release() | ||
_ = gen.Wait() | ||
_ = syscall.Kill(-gen.Process.Pid, syscall.SIGKILL) | ||
_ = os.RemoveAll("./data/") | ||
}() | ||
|
||
time.Sleep(run) | ||
} | ||
|
||
func startLogsAgent() *exec.Cmd { | ||
cmd := exec.Command("./grafana-agent-flow", "run", "./logs.river", "--storage.path=./data/logs", "--server.http.listen-addr=127.0.0.1:12346") | ||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} | ||
cmd.Stdout = os.Stdout | ||
cmd.Stderr = os.Stderr | ||
|
||
err := cmd.Start() | ||
if err != nil { | ||
panic(err.Error()) | ||
} | ||
return cmd | ||
} | ||
|
||
func startLogsGenAgent() *exec.Cmd { | ||
cmd := exec.Command("./grafana-agent-flow", "run", "./logsgen.river", "--storage.path=./data/logs-gen", "--server.http.listen-addr=127.0.0.1:12349") | ||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} | ||
cmd.Stdout = os.Stdout | ||
cmd.Stderr = os.Stderr | ||
|
||
err := cmd.Start() | ||
if err != nil { | ||
panic(err.Error()) | ||
} | ||
return cmd | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
logging { | ||
level = "debug" | ||
} | ||
|
||
prometheus.scrape "scraper" { | ||
targets = concat([{"__address__" = "localhost:12346"}]) | ||
forward_to = [prometheus.relabel.mutator.receiver] | ||
scrape_interval = "60s" | ||
} | ||
|
||
prometheus.relabel "mutator" { | ||
rule { | ||
source_labels = ["__name__"] | ||
regex = "(.+)" | ||
replacement = "normal" | ||
target_label = "runtype" | ||
} | ||
|
||
rule { | ||
source_labels = ["__name__"] | ||
regex = "(.+)" | ||
replacement = env("NAME") | ||
target_label = "test_name" | ||
} | ||
|
||
rule { | ||
source_labels = ["__name__"] | ||
action = "keep" | ||
regex = "(agent_wal_storage_active_series|agent_resources_process_cpu_seconds_total|go_memstats_alloc_bytes|go_gc_duration_seconds_sum|go_gc_duration_seconds_count|loki_source_file_files_active_total|loki_write_encoded_bytes_total|loki_write_sent_bytes_total|loki_source_file_file_bytes_total)" | ||
} | ||
|
||
forward_to = [prometheus.remote_write.agent_stats.receiver] | ||
} | ||
|
||
prometheus.remote_write "agent_stats" { | ||
endpoint { | ||
url = "https://prometheus-us-central1.grafana.net/api/prom/push" | ||
|
||
basic_auth { | ||
username = env("PROM_USERNAME") | ||
password = env("PROM_PASSWORD") | ||
} | ||
} | ||
} | ||
|
||
|
||
local.file_match "logs" { | ||
path_targets = [ | ||
{__path__ = "./data/logs-gen/loki.test.logs.logs/*.log"}, | ||
] | ||
} | ||
|
||
loki.source.file "tmpfiles" { | ||
targets = local.file_match.logs.targets | ||
forward_to = [loki.write.local.receiver] | ||
} | ||
|
||
loki.write "local" { | ||
endpoint { | ||
url = "http://localhost:8888/post" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
go build -o main | ||
|
||
# each test is ran with the first argument being the name , the second whether the endpoint accepts metrics, the third for the duration and the last being the discovery | ||
# endpont. See test.river for details on each endpoint. | ||
./main logs 1h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
loki.test.logs "logs" { | ||
number_of_files = 100 | ||
file_churn_percent = .25 | ||
file_refresh = "1m" | ||
writes_per_cadence = 1000 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"net/http" | ||
"os" | ||
"os/exec" | ||
"strconv" | ||
"syscall" | ||
"time" | ||
|
||
"github.com/gorilla/mux" | ||
) | ||
|
||
// main handles creating the benchmark. | ||
func main() { | ||
username := os.Getenv("PROM_USERNAME") | ||
if username == "" { | ||
panic("PROM_USERNAME env must be set") | ||
} | ||
password := os.Getenv("PROM_PASSWORD") | ||
if password == "" { | ||
panic("PROM_PASSWORD env must be set") | ||
} | ||
|
||
// Start the HTTP server, that can swallow requests. | ||
go httpServer() | ||
// Build the agent | ||
buildAgent() | ||
|
||
benchType := os.Args[1] | ||
if benchType == "metrics" { | ||
name := os.Args[2] | ||
allowWal := os.Args[3] | ||
duration := os.Args[4] | ||
discovery := os.Args[5] | ||
allowWalBool, _ := strconv.ParseBool(allowWal) | ||
parsedDuration, _ := time.ParseDuration(duration) | ||
fmt.Println(name, allowWalBool, parsedDuration, discovery) | ||
|
||
startMetricsRun(name, allowWalBool, parsedDuration, discovery) | ||
} else if benchType == "logs" { | ||
duration := os.Args[2] | ||
parsedDuration, _ := time.ParseDuration(duration) | ||
startLogsRun(parsedDuration) | ||
} else { | ||
panic("unknown benchmark type") | ||
} | ||
} | ||
|
||
func startMetricsRun(name string, allowWAL bool, run time.Duration, discovery string) { | ||
_ = os.RemoveAll("./data/normal-data") | ||
_ = os.RemoveAll("./data/test-data") | ||
|
||
allow = allowWAL | ||
_ = os.Setenv("NAME", name) | ||
_ = os.Setenv("ALLOW_WAL", strconv.FormatBool(allowWAL)) | ||
_ = os.Setenv("DISCOVERY", discovery) | ||
|
||
metric := startMetricsAgent() | ||
fmt.Println("starting metric agent") | ||
defer func() { | ||
_ = metric.Process.Kill() | ||
_ = metric.Process.Release() | ||
_ = metric.Wait() | ||
_ = syscall.Kill(-metric.Process.Pid, syscall.SIGKILL) | ||
_ = os.RemoveAll("./data/test-data") | ||
}() | ||
old := startNormalAgent() | ||
fmt.Println("starting normal agent") | ||
|
||
defer func() { | ||
_ = old.Process.Kill() | ||
_ = old.Process.Release() | ||
_ = old.Wait() | ||
_ = syscall.Kill(-old.Process.Pid, syscall.SIGKILL) | ||
_ = os.RemoveAll("./data/normal-data") | ||
}() | ||
time.Sleep(run) | ||
} | ||
|
||
func buildAgent() { | ||
cmd := exec.Command("go", "build", "../grafana-agent-flow") | ||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} | ||
cmd.Stdout = os.Stdout | ||
cmd.Stderr = os.Stderr | ||
err := cmd.Run() | ||
if err != nil { | ||
panic(err.Error()) | ||
} | ||
} | ||
|
||
func startNormalAgent() *exec.Cmd { | ||
cmd := exec.Command("./grafana-agent-flow", "run", "./normal.river", "--storage.path=./data/normal-data", "--server.http.listen-addr=127.0.0.1:12346") | ||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} | ||
//cmd.Stdout = os.Stdout | ||
//cmd.Stderr = os.Stderr | ||
err := cmd.Start() | ||
if err != nil { | ||
panic(err.Error()) | ||
} | ||
return cmd | ||
} | ||
|
||
func startMetricsAgent() *exec.Cmd { | ||
cmd := exec.Command("./grafana-agent-flow", "run", "./test.river", "--storage.path=./data/test-data", "--server.http.listen-addr=127.0.0.1:9001") | ||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} | ||
err := cmd.Start() | ||
if err != nil { | ||
panic(err.Error()) | ||
} | ||
return cmd | ||
} | ||
|
||
var allow = false | ||
|
||
func httpServer() { | ||
r := mux.NewRouter() | ||
r.HandleFunc("/post", func(w http.ResponseWriter, r *http.Request) { | ||
handlePost(w, r) | ||
}) | ||
r.HandleFunc("/allow", func(w http.ResponseWriter, r *http.Request) { | ||
println("allowing") | ||
allow = true | ||
}) | ||
r.HandleFunc("/block", func(w http.ResponseWriter, r *http.Request) { | ||
println("blocking") | ||
allow = false | ||
}) | ||
http.Handle("/", r) | ||
println("Starting server") | ||
err := http.ListenAndServe(":8888", nil) | ||
if err != nil { | ||
println(err) | ||
} | ||
} | ||
|
||
func handlePost(w http.ResponseWriter, _ *http.Request) { | ||
if allow { | ||
return | ||
} else { | ||
println("returning 500") | ||
w.WriteHeader(500) | ||
} | ||
} |
Oops, something went wrong.