feat(minor): Add support for metricP90AbsoluteThresholds export format (

#180) This provides better out-of-the-box experience for absolute checks and is geared towards CI checking of e.g. malloc/syscall deviations. The thresholds can e.g. be put be in a "Thresholds" directory. To generate a new baseline, run e.g. ```bash swift package --allow-writing-to-package-directory benchmark --format metricP90AbsoluteThresholds --path Thresholds/ ``` To run a check vs. the saved thresholds, run: ```bash swift package benchmark baseline check --check-absolute-path /relative/or/absolute/path/to/Thresholds ```
ordo-one · Aug 24, 2023 · 3549c2b · 3549c2b
1 parent 6e0a077
commit 3549c2b
Show file tree

Hide file tree

Showing 18 changed files with 238 additions and 27 deletions.
diff --git a/Benchmarks/Basic/Basic+SetupTeardown.swift b/Benchmarks/Basic/Basic+SetupTeardown.swift
@@ -1,5 +1,5 @@
 //
-//  File.swift
+//  Basic+SetupTeardown.swift
 //
 //
 //  Created by Joakim Hassila on 2023-04-21.

diff --git a/Benchmarks/P90AbsoluteThresholds/P90AbsoluteThresholds.swift b/Benchmarks/P90AbsoluteThresholds/P90AbsoluteThresholds.swift
@@ -0,0 +1,33 @@
+//
+// Copyright (c) 2023 Ordo One AB
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+
+import Benchmark
+import Foundation
+
+let benchmarks = {
+    Benchmark.defaultConfiguration = .init(metrics: [.mallocCountTotal, .syscalls],
+                                           warmupIterations: 1,
+                                           scalingFactor: .kilo,
+                                           maxDuration: .seconds(2),
+                                           maxIterations: .kilo(100))
+
+    Benchmark("P90Date") { benchmark in
+        for _ in benchmark.scaledIterations {
+            blackHole(Foundation.Date())
+        }
+    }
+
+    Benchmark("P90Malloc") { benchmark in
+        for _ in benchmark.scaledIterations {
+            var array: [Int] = []
+            array.append(contentsOf: 0 ... 1_000)
+            blackHole(array)
+        }
+    }
+}
diff --git a/Package.swift b/Package.swift
@@ -203,3 +203,17 @@ package.targets += [
         ]
     ),
 ]
+
+// Benchmark testing loading of p90 absolute thresholds
+package.targets += [
+    .executableTarget(
+        name: "P90AbsoluteThresholdsBenchmark",
+        dependencies: [
+            "Benchmark",
+        ],
+        path: "Benchmarks/P90AbsoluteThresholds",
+        plugins: [
+            "BenchmarkPlugin"
+        ]
+    ),
+]
diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkCommandPlugin.swift
@@ -40,7 +40,8 @@ import PackagePlugin
         let pathSpecified = argumentExtractor.extractOption(named: "path") // export path
         let quietRunning = argumentExtractor.extractFlag(named: "quiet")
         let noProgress = argumentExtractor.extractFlag(named: "no-progress")
-        let checkAbsoluteThresholds = argumentExtractor.extractFlag(named: "check-absolute")
+        let checkAbsoluteThresholdsPath = argumentExtractor.extractOption(named: "check-absolute-path")
+        let checkAbsoluteThresholds = checkAbsoluteThresholdsPath.count > 0 ? 1 : argumentExtractor.extractFlag(named: "check-absolute")
         let groupingToUse = argumentExtractor.extractOption(named: "grouping")
         let metricsToUse = argumentExtractor.extractOption(named: "metric")
         let debug = argumentExtractor.extractFlag(named: "debug")
@@ -218,7 +219,14 @@ import PackagePlugin
         }
 
         if checkAbsoluteThresholds > 0 {
+            if checkAbsoluteThresholdsPath.count > 1 {
+                print("Only a single path for thresholds can be specified, got \(checkAbsoluteThresholdsPath.count).")
+                return
+            }
             args.append(contentsOf: ["--check-absolute-thresholds"])
+            if let path = checkAbsoluteThresholdsPath.first {
+                args.append(contentsOf: ["--check-absolute-thresholds-path", path])
+            }
         }
 
         if scale > 0 {
@@ -330,6 +338,9 @@ import PackagePlugin
                             throw MyError.benchmarkCrashed
                         case .thresholdViolation:
                             throw MyError.benchmarkThresholdDeviation
+                        case .benchmarkJobFailed:
+                            print("One benchmark job failed during runtime, continuing with remaining.")
+                            break
                         }
                     } else {
                         print("One or more benchmarks returned an unexpected return code \(status)")

diff --git a/Plugins/BenchmarkCommandPlugin/BenchmarkPlugin+Help.swift b/Plugins/BenchmarkCommandPlugin/BenchmarkPlugin+Help.swift
@@ -12,8 +12,7 @@ let help =
     """
     OVERVIEW: Run benchmarks or update, compare or check performance baselines
 
-    Performs operations on benchmarks (running or listing them), as well as storing, comparing baselines as well as checking them for threshold
-    deviations.
+    Performs operations on benchmarks (running or listing them), as well as storing, comparing baselines as well as checking them for threshold deviations.
 
     The init command will create a skeleton benchmark suite for you and add it to Package.swift.
 
@@ -39,22 +38,17 @@ let help =
        swift package benchmark help
 
     ARGUMENTS:
-    <command>               The benchmark command to perform, one of: ["run", "list", "baseline", "help", "init"]. If not specified, 'run' is
-                          implied.
+    <command>               The benchmark command to perform, one of: ["run", "list", "baseline", "help", "init"]. If not specified, 'run' is implied.
 
     OPTIONS:
     --filter <filter>       Benchmarks matching the regexp filter that should be run
     --skip <skip>           Benchmarks matching the regexp filter that should be skipped
     --target <target>       Benchmark targets matching the regexp filter that should be run
     --skip-target <skip-target>
                           Benchmark targets matching the regexp filter that should be skipped
-    --format <format>       The output format to use, one of: ["text", "markdown", "influx", "jmh", "histogramEncoded", "histogram",
-                          "histogramSamples", "histogramPercentiles"], default is 'text'
-    --metric <metric>       Specifies that the benchmark run should use one or more specific metrics instead of the ones defined by the
-                          benchmarks, valid values are: ["cpuUser", "cpuSystem", "cpuTotal", "wallClock", "throughput", "peakMemoryResident",
-                          "peakMemoryVirtual", "mallocCountSmall", "mallocCountLarge", "mallocCountTotal", "allocatedResidentMemory",
-                          "memoryLeaked", "syscalls", "contextSwitches", "threads", "threadsRunning", "readSyscalls", "writeSyscalls",
-                          "readBytesLogical", "writeBytesLogical", "readBytesPhysical", "writeBytesPhysical", "retainCount", "releaseCount",
+    --format <format>       The output format to use, one of: ["text", "markdown", "influx", "jmh", "histogramEncoded", "histogram", "histogramSamples", "histogramPercentiles", "metricP90AbsoluteThresholds"], default is 'text'
+    --metric <metric>       Specifies that the benchmark run should use one or more specific metrics instead of the ones defined by the benchmarks, valid values are: ["cpuUser", "cpuSystem", "cpuTotal", "wallClock", "throughput", "peakMemoryResident", "peakMemoryVirtual", "mallocCountSmall", "mallocCountLarge",
+                          "mallocCountTotal", "allocatedResidentMemory", "memoryLeaked", "syscalls", "contextSwitches", "threads", "threadsRunning", "readSyscalls", "writeSyscalls", "readBytesLogical", "writeBytesLogical", "readBytesPhysical", "writeBytesPhysical", "retainCount", "releaseCount",
                           "retainReleaseDelta", "custom"]
     --path <path>           The path where exported data is stored, default is the current directory (".").
     --quiet                 Specifies that output should be suppressed (useful for if you just want to check return code)
@@ -67,6 +61,9 @@ let help =
                           a specific check against a given absolute reference.).
                           If this is enabled, zero or one baselines should be specified for the check operation.
                           By default, thresholds are checked comparing two baselines, or a baseline and a benchmark run.
+    --check-absolute-thresholds-path <check-absolute-thresholds-path>
+                          The path from which p90 thresholds will be loaded for absolute threshold checks.
+                          This implicitly sets --check-absolute to true as well.
     --no-progress           Specifies that benchmark progress information should not be displayed
     --grouping <grouping>   The grouping to use, one of: ["metric", "benchmark"]. default is 'benchmark'
     -h, --help              Show help information.

diff --git a/Plugins/BenchmarkCommandPlugin/Command+Helpers.swift b/Plugins/BenchmarkCommandPlugin/Command+Helpers.swift
@@ -39,6 +39,8 @@ enum OutputFormat: String, CaseIterable {
     case histogramSamples
     /// The percentiles values betwen (0-100) in TSV format for processing by external tools (e.g. Youplot)
     case histogramPercentiles
+    /// The p90 percentile values per metric as a `[BenchmarkMetric: BenchmarkThresholds]` in JSON format, suitable for static thresholds
+    case metricP90AbsoluteThresholds
 }
 
 enum Grouping: String, CaseIterable {
@@ -59,4 +61,5 @@ enum ExitCode: Int32 {
     case success = 0
     case genericFailure = 1
     case thresholdViolation = 2
+    case benchmarkJobFailed = 3
 }
diff --git a/Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift b/Plugins/BenchmarkHelpGenerator/BenchmarkHelpGenerator.swift
@@ -122,6 +122,13 @@ struct Benchmark: AsyncParsableCommand {
         """)
     var checkAbsoluteThresholds = false
 
+    @Option(name: .long, help:
+        """
+        The path from which p90 thresholds will be loaded for absolute threshold checks.
+        This implicitly sets --check-absolute to true as well.
+        """)
+    var checkAbsoluteThresholdsPath: String?
+
     @Flag(name: .long, help: "Specifies that benchmark progress information should not be displayed")
     var noProgress: Int
 

diff --git a/Plugins/BenchmarkHelpGenerator/Command+Helpers.swift b/Plugins/BenchmarkHelpGenerator/Command+Helpers.swift
@@ -39,6 +39,8 @@ enum OutputFormat: String, CaseIterable {
     case histogramSamples
     /// The percentiles values betwen (0-100) in TSV format for processing by external tools (e.g. Youplot)
     case histogramPercentiles
+    /// The p90 percentile values per metric as a `[BenchmarkMetric: BenchmarkThresholds]` in JSON format, suitable for static thresholds
+    case metricP90AbsoluteThresholds
 }
 
 enum Grouping: String, CaseIterable {
@@ -59,4 +61,5 @@ enum ExitCode: Int32 {
     case success = 0
     case genericFailure = 1
     case thresholdViolation = 2
+    case benchmarkJobFailed = 3
 }
diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Export.swift b/Plugins/BenchmarkTool/BenchmarkTool+Export.swift
@@ -11,6 +11,7 @@
 import Benchmark
 import DateTime
 import ExtrasJSON
+import Foundation
 import SystemPackage
 
 #if canImport(Darwin)
@@ -219,6 +220,25 @@ extension BenchmarkTool {
                     outputString = ""
                 }
             }
+        case .metricP90AbsoluteThresholds:
+            try baseline.results.forEach { key, results in
+                let jsonEncoder = JSONEncoder()
+                jsonEncoder.outputFormatting = [.prettyPrinted, .sortedKeys]
+
+                var outputResults : [String : BenchmarkThresholds.AbsoluteThreshold] = [:]
+                results.forEach { values in
+                    outputResults[values.metric.rawDescription] = Int(values.statistics.histogram.valueAtPercentile(90.0))
+                }
+
+                let jsonResultData = try jsonEncoder.encode(outputResults)
+
+                if let stringOutput = String(data: jsonResultData, encoding: .utf8) {
+                    try write(exportData: stringOutput,
+                              fileName: cleanupStringForShellSafety("\(key.target).\(key.name).p90.json"))
+                } else {
+                    print("Failed to encode json for \(outputResults)")
+                }
+            }
         }
     }
 

diff --git a/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift b/Plugins/BenchmarkTool/BenchmarkTool+Operations.swift
@@ -146,7 +146,25 @@ extension BenchmarkTool {
                         print("Can only do threshold violation checks for exactly 1 benchmark baseline, got: \(benchmarkBaselines.count) baselines.")
                         return
                     }
-
+                    if let benchmarkPath = checkAbsoluteThresholdsPath { // load statically defined threshods for .p90
+                        benchmarks.forEach { benchmark in
+                            let thresholds = BenchmarkTool.makeBenchmarkThresholds(path: benchmarkPath,
+                                                                                   moduleName: benchmark.target,
+                                                                                   benchmarkName: benchmark.name)
+                            var transformed: [BenchmarkMetric : BenchmarkThresholds] = [:]
+                            if let thresholds {
+                                thresholds.forEach { key, value in
+                                    if let metric = BenchmarkMetric(argument: key) {
+                                        let absoluteThreshold : BenchmarkThresholds.AbsoluteThresholds = [.p90 : value]
+                                        transformed[metric] = BenchmarkThresholds(absolute: absoluteThreshold)
+                                    }
+                                }
+                                if transformed.isEmpty == false {
+                                    benchmark.configuration.thresholds = transformed
+                                }
+                            }
+                        }
+                    }
                     print("")
                     let currentBaseline = benchmarkBaselines[0]
                     let baselineName = baseline[0]

diff --git a/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift b/Plugins/BenchmarkTool/BenchmarkTool+ReadP90AbsoluteThresholds.swift
@@ -0,0 +1,82 @@
+//
+// Copyright (c) 2023 Ordo One AB.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+//
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+
+import Benchmark
+import Foundation
+import SystemPackage
+
+#if canImport(Darwin)
+    import Darwin
+#elseif canImport(Glibc)
+    import Glibc
+#else
+    #error("Unsupported Platform")
+#endif
+
+extension BenchmarkTool {
+    /// `makeBenchmarkThresholds` is a convenience function for reading p90 static thresholds that previously have been exported with `metricP90AbsoluteThresholds`
+    ///
+    /// - Parameters:
+    ///   - path: The path where the `Thresholds` directory should be located, containing static thresholds files using the naming pattern:
+    ///   `moduleName.benchmarkName.p90.json`
+    ///   - moduleName: The name of the benchmark module, can be extracted in the benchmark using:
+    ///   `String("\(#fileID)".prefix(while: { $0 != "/" }))`
+    ///   - benchmarkName: The name of the benchmark
+    /// - Returns: A dictionary with static benchmark thresholds per metric or nil if the file could not be found or read
+    static func makeBenchmarkThresholds(path: String,
+                                        moduleName: String,
+                                        benchmarkName: String) -> [String : BenchmarkThresholds.AbsoluteThreshold]? {
+        var path = FilePath(path)
+        if path.isAbsolute {
+            path.append("\(moduleName).\(benchmarkName).p90.json")
+        } else {
+            var cwdPath = FilePath(FileManager.default.currentDirectoryPath)
+            cwdPath.append(path.components)
+            cwdPath.append("\(moduleName).\(benchmarkName).p90.json")
+            path = cwdPath
+        }
+
+        var p90Thresholds: [String : BenchmarkThresholds.AbsoluteThreshold]?
+
+        do {
+            let fileDescriptor = try FileDescriptor.open(path, .readOnly, options: [], permissions: .ownerRead)
+
+            do {
+                try fileDescriptor.closeAfter {
+                    do {
+                        var readBytes = [UInt8]()
+                        let bufferSize = 16 * 1_024 * 1_024
+
+                        while true {
+                            let nextBytes = try [UInt8](unsafeUninitializedCapacity: bufferSize) { buf, count in
+                                count = try fileDescriptor.read(into: UnsafeMutableRawBufferPointer(buf))
+                            }
+                            if nextBytes.isEmpty {
+                                break
+                            }
+                            readBytes.append(contentsOf: nextBytes)
+                        }
+
+                        p90Thresholds = try JSONDecoder().decode([String : BenchmarkThresholds.AbsoluteThreshold].self, from: Data(readBytes))
+                    } catch {
+                        print("Failed to read file at \(path) [\(error)] \(Errno(rawValue: errno).description)")
+                    }
+                }
+            } catch {
+                print("Failed to close fd for \(path) after reading.")
+            }
+        } catch {
+            if errno != ENOENT { // file not found is ok, e.g. no thresholds found, then silently return nil
+                print("Failed to open file \(path), errno = [\(errno)] \(Errno(rawValue: errno).description)")
+            }
+        }
+        return p90Thresholds
+    }
+}
diff --git a/Plugins/BenchmarkTool/BenchmarkTool.swift b/Plugins/BenchmarkTool/BenchmarkTool.swift
@@ -81,6 +81,13 @@ struct BenchmarkTool: AsyncParsableCommand {
         """)
     var checkAbsoluteThresholds = false
 
+    @Option(name: .long, help:
+        """
+        The path from which p90 thresholds will be loaded for absolute threshold checks.
+        This implicitly sets --check-absolute to true as well.
+        """)
+    var checkAbsoluteThresholdsPath: String?
+
     @Option(name: .long, help: "The named baseline(s) we should display, update, delete or compare with")
     var baseline: [String] = []
 

diff --git a/Plugins/BenchmarkTool/Command+Helpers.swift b/Plugins/BenchmarkTool/Command+Helpers.swift
@@ -39,6 +39,8 @@ enum OutputFormat: String, CaseIterable {
     case histogramSamples
     /// The percentiles values betwen (0-100) in TSV format for processing by external tools (e.g. Youplot)
     case histogramPercentiles
+    /// The p90 percentile values per metric as a `[BenchmarkMetric: BenchmarkThresholds]` in JSON format, suitable for static thresholds
+    case metricP90AbsoluteThresholds
 }
 
 enum Grouping: String, CaseIterable {

diff --git a/Sources/Benchmark/Documentation.docc/ComparingBenchmarksCI.md b/Sources/Benchmark/Documentation.docc/ComparingBenchmarksCI.md
@@ -23,6 +23,15 @@ The following will run all benchmarks and compare them against a fixed absolute
 ```bash
 swift package benchmark baseline check --check-absolute
 ```
+This is typically used in conjunction with the built in support for exporting absolute p90 baselines using the `metricP90AbsoluteThresholds` export format.
+```bash
+swift package --allow-writing-to-package-directory benchmark --filter "P90.*" --format metricP90AbsoluteThresholds --path Thresholds/
+```
+
+These baselines can then be checked with:
+```bash
+swift package benchmark baseline check --check-absolute-path /relative/or/absolute/path/to/Thresholds
+```
 
 ### Example GitHub CI workflow comparing against a baseline