TuringLang · torfjelde · Aug 2, 2021 · Aug 19, 2021 · Sep 8, 2021 · Nov 6, 2021
diff --git a/.github/workflows/Benchmarking.yml b/.github/workflows/Benchmarking.yml
@@ -0,0 +1,66 @@
+name: Benchmarking
+
+on:
+  pull_request:
+
+jobs:
+  benchmarks:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Set up Julia
+        uses: julia-actions/setup-julia@v2
+        with:
+          version: '1'
+
+      - name: Install Dependencies
+        run: julia --project=benchmarks/ -e 'using Pkg; Pkg.instantiate()'
+
+      - name: Run Benchmarks
+        id: run_benchmarks
+        run: |
+          # Capture version info into a variable, print it, and set it as an env var for later steps
+          version_info=$(julia -e 'using InteractiveUtils; versioninfo()')
+          echo "$version_info"
+          echo "VERSION_INFO<<EOF" >> $GITHUB_ENV
+          echo "$version_info" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
+
+          # Capture benchmark output into a variable
+          echo "Running Benchmarks..."
+          benchmark_output=$(julia --project=benchmarks benchmarks/benchmarks.jl)
+
+          # Print benchmark results directly to the workflow log
+          echo "Benchmark Results:"
+          echo "$benchmark_output"
+
+          # Set the benchmark output as an env var for later steps
+          echo "BENCHMARK_OUTPUT<<EOF" >> $GITHUB_ENV
+          echo "$benchmark_output" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
+
+      - name: Find Existing Comment
+        uses: peter-evans/find-comment@v3
+        id: find_comment
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          comment-author: github-actions[bot]
+
+      - name: Post Benchmark Results as PR Comment
+        uses: peter-evans/create-or-update-comment@v4
+        with:
+          issue-number: ${{ github.event.pull_request.number }}
+          body: |
+            ## Computer Information
+            ```
+            ${{ env.VERSION_INFO }}
+            ```
+            ## Benchmark Report
+            ${{ env.BENCHMARK_OUTPUT }}
+          comment-id: ${{ steps.find_comment.outputs.comment-id }}
+          edit-mode: replace
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
@@ -4,10 +4,8 @@ version = "0.1.0"
 
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-DiffUtils = "8294860b-85a6-42f8-8c35-d911f667b5f6"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
-LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433"
-Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-Weave = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+TuringBenchmarking = "0db1332d-5c25-4deb-809f-459bc696f94f"
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -1,27 +1,5 @@
-To run the benchmarks, simply do:
+To run the benchmarks, simply do this from the root directory of the repository:
 
 ```sh
-julia --project -e 'using DynamicPPLBenchmarks; weave_benchmarks();'
-```
-
-```julia
-julia> @doc weave_benchmarks
-  weave_benchmarks(input="benchmarks.jmd"; kwargs...)
-
-  Weave benchmarks present in benchmarks.jmd into a single file.
-
-  Keyword arguments
-  ≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡
-
-    •  benchmarkbody: JMD-file to be rendered for each model.
-
-    •  include_commit_id=false: specify whether to include commit-id in the default name.
-
-    •  name: the name of directory in results/ to use as output directory.
-
-    •  name_old=nothing: if specified, comparisons of current run vs. the run pinted to by name_old will be included in the generated document.
-
-    •  include_typed_code=false: if true, output of code_typed for the evaluator of the model will be included in the weaved document.
-
-    •  Rest of the passed kwargs will be passed on to Weave.weave.
-```
+julia --project=benchmarks benchmarks/benchmarks.jl
+```
diff --git a/benchmarks/benchmark_body.jmd b/benchmarks/benchmark_body.jmd
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
@@ -0,0 +1,108 @@
+using DynamicPPLBenchmarks: Models, make_suite
+using BenchmarkTools: @benchmark, median, run
+using PrettyTables: PrettyTables, ft_printf
+using Random: seed!
+
+seed!(23)
+
+# Create DynamicPPL.Model instances to run benchmarks on.
+smorgasbord_instance = Models.smorgasbord(randn(100), randn(100))
+loop_univariate1k, multivariate1k = begin
+    data_1k = randn(1_000)
+    loop = Models.loop_univariate(length(data_1k)) | (; o=data_1k)
+    multi = Models.multivariate(length(data_1k)) | (; o=data_1k)
+    loop, multi
+end
+loop_univariate10k, multivariate10k = begin
+    data_10k = randn(10_000)
+    loop = Models.loop_univariate(length(data_10k)) | (; o=data_10k)
+    multi = Models.multivariate(length(data_10k)) | (; o=data_10k)
+    loop, multi
+end
+lda_instance = begin
+    w = [1, 2, 3, 2, 1, 1]
+    d = [1, 1, 1, 2, 2, 2]
+    Models.lda(2, d, w)
+end
+
+# Specify the combinations to test:
+# (Model Name, model instance, VarInfo choice, AD backend, linked)
+chosen_combinations = [
+    (
+        "Simple assume observe",
+        Models.simple_assume_observe(randn()),
+        :typed,
+        :forwarddiff,
+        false,
+    ),
+    ("Smorgasbord", smorgasbord_instance, :typed, :forwarddiff, false),
+    ("Smorgasbord", smorgasbord_instance, :simple_namedtuple, :forwarddiff, true),
+    ("Smorgasbord", smorgasbord_instance, :untyped, :forwarddiff, true),
+    ("Smorgasbord", smorgasbord_instance, :simple_dict, :forwarddiff, true),
+    ("Smorgasbord", smorgasbord_instance, :typed, :reversediff, true),
+    # TODO(mhauru) Add Mooncake once TuringBenchmarking.jl supports it. Consider changing
+    # all the below :reversediffs to :mooncakes too.
+    #("Smorgasbord", smorgasbord_instance, :typed, :mooncake, true),
+    ("Loop univariate 1k", loop_univariate1k, :typed, :reversediff, true),
+    ("Multivariate 1k", multivariate1k, :typed, :reversediff, true),
+    ("Loop univariate 10k", loop_univariate10k, :typed, :reversediff, true),
+    ("Multivariate 10k", multivariate10k, :typed, :reversediff, true),
+    ("Dynamic", Models.dynamic(), :typed, :reversediff, true),
+    ("Submodel", Models.parent(randn()), :typed, :reversediff, true),
+    ("LDA", lda_instance, :typed, :reversediff, true),
+]
+
+# Time running a model-like function that does not use DynamicPPL, as a reference point.
+# Eval timings will be relative to this.
+reference_time = begin
+    obs = randn()
+    median(@benchmark Models.simple_assume_observe_non_model(obs)).time
+end
+
+results_table = Tuple{String,String,String,Bool,Float64,Float64}[]
+
+for (model_name, model, varinfo_choice, adbackend, islinked) in chosen_combinations
+    suite = make_suite(model, varinfo_choice, adbackend)
+    results = run(suite)
+    result_key = islinked ? "linked" : "standard"
+
+    eval_time = median(results["evaluation"][result_key]).time
+    relative_eval_time = eval_time / reference_time
+
+    grad_group = results["gradient"]
+    if isempty(grad_group)
+        relative_ad_eval_time = NaN
+    else
+        grad_backend_key = first(keys(grad_group))
+        ad_eval_time = median(grad_group[grad_backend_key][result_key]).time
+        relative_ad_eval_time = ad_eval_time / eval_time
+    end
+
+    push!(
+        results_table,
+        (
+            model_name,
+            string(adbackend),
+            string(varinfo_choice),
+            islinked,
+            relative_eval_time,
+            relative_ad_eval_time,
+        ),
+    )
+end
+
+table_matrix = hcat(Iterators.map(collect, zip(results_table...))...)
+header = [
+    "Model",
+    "AD Backend",
+    "VarInfo Type",
+    "Linked",
+    "Eval Time / Ref Time",
+    "AD Time / Eval Time",
+]
+PrettyTables.pretty_table(
+    table_matrix;
+    header=header,
+    tf=PrettyTables.tf_markdown,
+    formatters=ft_printf("%.1f", [5, 6]),
+)