Turning latency/bandwidth into simpler (run+analyze) commands (#21)

minio · Nov 13, 2024 · 8f693ec · 8f693ec
1 parent 5d46e3d
commit 8f693ec
Show file tree

Hide file tree

Showing 10 changed files with 430 additions and 117 deletions.
diff --git a/README.md b/README.md
@@ -33,11 +33,10 @@ WARNING: do not expose `--address` to the internet
 <b>NOTE: if the `--address` is not the same as your external IP addres used for communications between servers then you need to set `--real-ip`, otherwise the server will report internal IPs in the stats and it will run the test against itself, causing invalid results.</b>
 
 ### The listen command
-Hperf can run tests without a specific `client` needing to be constantly connected. Once the `client` has started a test, the `client` can 
-easily exit without interrupting the test stopping.
+Hperf can run tests without a specific `client` needing to be constantly connected. Once the `client` has started a test, the `client` can exit without affecting the test.
 
-Any `client` can hook into the list test at runtime using the `--id` of the test. There can even be multiple `clients`
-listening to the same test.
+Any `client` can hook into the list test at runtime using the `--id` of the test.
+There can even be multiple `clients` listening to the same test.
 
 Example:
 ```bash
@@ -97,75 +96,131 @@ NOTE: Be careful not to re-use the ID's if you care about fetching results at a
 
 # analyze test results
 ./hperf analyze --file /tmp/test.out
+
 # analyze test results with full print output
 ./hperf analyze --file /tmp/test.out --print-stats --print-errors
 
 # Generate a .csv file from a .json test file
 ./hperf csv --file /tmp/test.out
 ```
 
-## Analysis
-The analyze command will print statistics for the 10th and 90th percentiles and all datapoints in between. Additionally, you can use the `--print-stats` and `--print-erros` flags for a more verbose output.
-
-The analysis will show:
- - 10th percentile: total, low, avarage, high
- - in between: total, low, avarage, high
- - 90th percentile: total, low, avarage, high
-
-## Statistics
- - Payload Roundtrip (RMS high/low): 
-   - Payload transfer time (Microseconds)
- - Time to first byte (TTFB high/low): 
-   - This is the amount of time (Microseconds) it takes between a request being made and the first byte being requested by the receiver
- - Transferred bytes (TX high/low): 
-   - Bandwidth throughput in KB/s, MB/s, GB/s, etc..
- - Transferred bytes (TX total): 
-   - Total transferred bytes (not per second)
- - Request count (#TX): 
-   - The number of HTTP/s requests made
- - Error Count (#ERR): 
-   - Number of encountered errors
- - Dropped Packets (#Dropped): 
- - Memory (Mem high/low/used): 
- - CPU (CPU high/low/used): 
 
-## Example: 20 second HTTP payload transfer test using multiple sockets
-This test will use 12 concurrent workers to send http requests with a payload without any timeout between requests.
-Much like a bandwidth test, but it will also test server behaviour when multiple sockets are being created and closed:
-```
-$ ./hperf requests --hosts file:./hosts --id http-test-1 --duration 20 --concurrency 12
-```
 
-## Example: 20 second HTTP payload transfer test using a stream
-This will perform a 20 second bandwidth test with 12 concurrent HTTP streams:
-```
-$ ./hperf bandwidth --hosts file:./hosts --id http-test-2 --duration 20 --concurrency 12
+# Full test scenario using (requests, download, analysis and csv export)
+## On the servers
+```bash
+$ ./hperf server --address 0.0.0.0:6000 --real-ip 10.10.10.2 --storage-path /tmp/hperf/
 ```
 
-## Example: 5 Minute latency test using a 1000 Byte buffer, with a delay of 50ms between requests
-This test will send a single round trip request between servers to test base latency and reachability:
-```
-$ ./hperf latency --hosts file:./hosts --id http-test-2 --duration 360 --concurrency 1 --requestDelay 50
---bufferSize 1000 --payloadSize 1000
-```
+## The client
 
-# Full test scenario with analysis and csv export
-## On the server
+### Run test
 ```bash
-$ ./hperf server --address 10.10.2.10:5000 --real-ip 150.150.20.2 --storage-path /tmp/hperf/
-```
+ ./hperf latency --hosts 10.10.10.{2...3} --port 6000 --duration 10 --id latency-test-1
 
-## The client
+ Test ID: latency-test-1
 
+#ERR   #TX        TX(high)   TX(low)    TX(total)       RMS(high) RMS(low)  TTFB(high) TTFB(low) #Dropped  Mem(high) Mem(low)  CPU(high) CPU(low)
+0      8          4.00 KB/s  4.00 KB/s  8.00 KB         1         0         0         0          937405    1         1         0         0
+0      26         5.00 KB/s  4.00 KB/s  18.00 KB        1         0         0         0          1874810   1         1         0         0
+0      73         5.00 KB/s  4.00 KB/s  33.00 KB        1         0         0         0          3317563   1         1         0         0
+0      92         5.00 KB/s  4.00 KB/s  38.00 KB        1         0         0         0          3749634   1         1         0         0
+0      140        5.00 KB/s  4.00 KB/s  48.00 KB        1         0         0         0          4687048   1         1         0         0
+0      198        5.00 KB/s  4.00 KB/s  58.00 KB        1         0         0         0          5624466   1         1         0         0
+0      266        5.00 KB/s  4.00 KB/s  68.00 KB        1         0         0         0          6561889   1         1         0         0
+0      344        5.00 KB/s  4.00 KB/s  78.00 KB        1         0         0         0          7499312   1         1         0         0
+0      432        5.00 KB/s  4.00 KB/s  88.00 KB        9         0         0         0          8436740   1         1         0         0
+0      530        5.00 KB/s  4.00 KB/s  98.00 KB        9         0         0         0          9374172   1         1         0         0
 
+ Testing finished ..
+ Analyzing data ..
 
 
+ _____ P99 data points _____
 
+Created  Local           Remote          RMS(high) RMS(low)  TTFB(high) TTFB(low) TX         #TX        #ERR   #Dropped  Mem(used) CPU(used)
+10:30:54 10.10.10.3      10.10.10.3      9         0         0          0         5.00 KB/s  44         0      432076    1         0
 
+ Sorting: RMSH
+ Time: Milliseconds
 
+P10  count      sum        min        avg        max
+     18         30         0          1          9
+P50  count      sum        min        avg        max
+     10         25         0          2          9
+P90  count      sum        min        avg        max
+     2          18         9          9          9
+P99  count      sum        min        avg        max
+     1          9          9          9          9
 
+```
 
+### Explaining the stats above.
+The first section includes the combined highs/lows and counters for ALL servers beings tested. 
+Each line represents a 1 second stat point.
+Here is a breakdown of the individual stats:
+
+ - `#ERR`: number of errors ( all servers )
+ - `#TX`: total number of HTTP requests made ( all servers )
+ - `TX(high/low)`: highest and lowest transfer rate seen ( single server )
+ - `RMS(high/low)`: longest and fastest round trip latency ( single server )
+ - `TTFB(high/low)`: The time it took to read the first byte ( single server ) 
+ - `#Dropped`: total number of dropped packets ( all servers )
+ - `Mem(high/low)`: highest and lowest memory usage ( single server )
+ - `CPU(high/low)`: highest and lowest cpu usage ( single server )
+
+The next section is a print-out for the `p99` data points.
+p99 represents the 1% of the worst data points and all statistics are related to
+that single data point between `Local` and `Remote`. 
+
+Finally we have the p10 to p99 breakdown.
+ - `Sorting`: the data point being sorted/used for the data breakdown
+ - `Time:`: the time unit being used. Default is milliseconds but can be changed to microseconds `--micro`
+ - `count`: the total number of data points in this category
+ - `sum`: the sum of all valuesa in thie category
+ - `min`: the single lowest value in this category
+ - `avg`: the avarage for all values in this category
+ - `max`: the highest value in this category
+
+### Download test
+```bash
+./hperf download --hosts 10.10.10.{2...3} --port 6000 --id latency-test-1 --file latency-test-1
+```
 
+### Analyze test
+NOTE: this analysis will display the same output as the final step when running the test above.
+```bash
+./hperf analyze --file latency-test-1 --print-stats --print-errors
+```
+
+### Export csv
+```bash
+./hperf csv --file latency-test-1
+```
+
+# Random Example tests
 
+## Example: Basic latency testing
+This will run a 20 second latency test and analyze+print the results when done
+```
+$ ./hperf latency --hosts file:./hosts --port [PORT] --duration 20 --print-all
+```
+
+## Example: Basic bandwidth testing
+This will run a 20 second bandwidth test and print the results when done
+```
+$ ./hperf bandwidth --hosts file:./hosts --port [PORT] --duration 20 --concurrency 10 --print-all
+```
 
+## Example: 20 second HTTP payload transfer test using a stream
+This will perform a 20 second bandwidth test with 12 concurrent HTTP streams:
+```
+$ ./hperf bandwidth --hosts file:./hosts --id http-test-2 --duration 20 --concurrency 12
+```
 
+## Example: 5 Minute latency test using a 1000 Byte buffer, with a delay of 50ms between requests
+This test will send a single round trip request between servers to test base latency and reachability:
+```
+$ ./hperf latency --hosts file:./hosts --id http-test-2 --duration 360 --concurrency 1 --requestDelay 50
+--bufferSize 1000 --payloadSize 1000
+```
diff --git a/client/client.go b/client/client.go
@@ -406,7 +406,7 @@ func RunTest(ctx context.Context, c shared.Config) (err error) {
 		}
 
 		for i := range responseERR {
-			fmt.Println(responseERR[i])
+			PrintErrorString(responseERR[i].Error)
 		}
 
 		if printCount%10 == 1 {
@@ -553,6 +553,66 @@ func DownloadTest(ctx context.Context, c shared.Config) (err error) {
 	return nil
 }
 
+func AnalyzeBandwidthTest(ctx context.Context, c shared.Config) (err error) {
+	_, cancel := context.WithCancel(ctx)
+	defer cancel()
+
+	if c.PrintAll {
+		shared.INFO(" Printing all data points ..")
+		fmt.Println("")
+
+		printSliceOfDataPoints(responseDPS, c)
+
+		if len(responseERR) > 0 {
+			fmt.Println(" ____ ERRORS ____")
+		}
+		for i := range responseERR {
+			PrintTError(responseERR[i])
+		}
+		if len(responseERR) > 0 {
+			fmt.Println("")
+		}
+	}
+
+	if len(responseDPS) == 0 {
+		fmt.Println("No datapoints found")
+		return
+	}
+
+	return nil
+}
+
+func AnalyzeLatencyTest(ctx context.Context, c shared.Config) (err error) {
+	_, cancel := context.WithCancel(ctx)
+	defer cancel()
+
+	if c.PrintAll {
+		shared.INFO(" Printing all data points ..")
+
+		printSliceOfDataPoints(responseDPS, c)
+
+		if len(responseERR) > 0 {
+			fmt.Println(" ____ ERRORS ____")
+		}
+		for i := range responseERR {
+			PrintTError(responseERR[i])
+		}
+		if len(responseERR) > 0 {
+			fmt.Println("")
+		}
+	}
+	if len(responseDPS) == 0 {
+		fmt.Println("No datapoints found")
+		return
+	}
+
+	shared.INFO(" Analyzing data ..")
+	fmt.Println("")
+	analyzeLatencyTest(responseDPS, c)
+
+	return nil
+}
+
 func AnalyzeTest(ctx context.Context, c shared.Config) (err error) {
 	_, cancel := context.WithCancel(ctx)
 	defer cancel()
@@ -592,7 +652,7 @@ func AnalyzeTest(ctx context.Context, c shared.Config) (err error) {
 		dps = shared.HostFilter(c.HostFilter, dps)
 	}
 
-	if c.PrintFull {
+	if c.PrintStats {
 		printSliceOfDataPoints(dps, c)
 	}
 
@@ -614,9 +674,9 @@ func AnalyzeTest(ctx context.Context, c shared.Config) (err error) {
 	}
 
 	switch dps[0].Type {
-	case shared.LatencyTest:
+	case shared.RequestTest:
 		analyzeLatencyTest(dps, c)
-	case shared.BandwidthTest:
+	case shared.StreamTest:
 		fmt.Println("")
 		fmt.Println("Detailed analysis for bandwidth testing is in development")
 	}

diff --git a/client/table.go b/client/table.go
@@ -191,9 +191,9 @@ func PrintColumns(style lipgloss.Style, columns ...column) {
 
 func printDataPointHeaders(t shared.TestType) {
 	switch t {
-	case shared.BandwidthTest:
+	case shared.StreamTest:
 		printHeader(BandwidthHeaders)
-	case shared.LatencyTest:
+	case shared.RequestTest:
 		printHeader(LatencyHeaders)
 	default:
 		printHeader(FullDataPointHeaders)
@@ -202,17 +202,17 @@ func printDataPointHeaders(t shared.TestType) {
 
 func printRealTimeHeaders(t shared.TestType) {
 	switch t {
-	case shared.BandwidthTest:
+	case shared.StreamTest:
 		printHeader(RealTimeBandwidthHeaders)
-	case shared.LatencyTest:
+	case shared.RequestTest:
 		printHeader(RealTimeLatencyHeaders)
 	default:
 	}
 }
 
 func printRealTimeRow(style lipgloss.Style, entry *shared.TestOutput, t shared.TestType) {
 	switch t {
-	case shared.BandwidthTest:
+	case shared.StreamTest:
 		PrintColumns(
 			style,
 			column{formatInt(int64(entry.ErrCount)), headerSlice[ErrCount].width},
@@ -227,7 +227,7 @@ func printRealTimeRow(style lipgloss.Style, entry *shared.TestOutput, t shared.T
 			column{formatInt(int64(entry.CL)), headerSlice[CPULow].width},
 		)
 		return
-	case shared.LatencyTest:
+	case shared.RequestTest:
 		PrintColumns(
 			style,
 			column{formatInt(int64(entry.ErrCount)), headerSlice[ErrCount].width},
@@ -252,7 +252,7 @@ func printRealTimeRow(style lipgloss.Style, entry *shared.TestOutput, t shared.T
 
 func printTableRow(style lipgloss.Style, entry *shared.DP, t shared.TestType) {
 	switch t {
-	case shared.BandwidthTest:
+	case shared.StreamTest:
 		PrintColumns(
 			style,
 			column{entry.Created.Format("15:04:05"), headerSlice[Created].width},
@@ -265,7 +265,7 @@ func printTableRow(style lipgloss.Style, entry *shared.DP, t shared.TestType) {
 			column{formatInt(int64(entry.CPUUsedPercent)), headerSlice[CPUUsage].width},
 		)
 		return
-	case shared.LatencyTest:
+	case shared.RequestTest:
 		PrintColumns(
 			style,
 			column{entry.Created.Format("15:04:05"), headerSlice[Created].width},