diff --git a/.github/workflows/DocCleanup.yml b/.github/workflows/DocCleanup.yml
new file mode 100644
index 0000000..43b39af
--- /dev/null
+++ b/.github/workflows/DocCleanup.yml
@@ -0,0 +1,28 @@
+name: Doc Preview Cleanup
+
+on:
+  pull_request:
+    types: [closed]
+
+jobs:
+  doc-preview-cleanup:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout gh-pages branch
+        uses: actions/checkout@v4
+        with:
+          ref: gh-pages
+
+      - name: Delete preview and history
+        run: |
+            git config user.name "Documenter.jl"
+            git config user.email "documenter@juliadocs.github.io"
+            git rm -rf "previews/PR$PRNUM"
+            git commit -m "delete preview"
+            git branch gh-pages-new $(echo "delete history" | git commit-tree HEAD^{tree})
+        env:
+            PRNUM: ${{ github.event.number }}
+
+      - name: Push changes
+        run: |
+            git push --force origin gh-pages-new:gh-pages
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
new file mode 100644
index 0000000..b14eea5
--- /dev/null
+++ b/.github/workflows/Documentation.yml
@@ -0,0 +1,28 @@
+name: Documentation
+
+on:
+  push:
+    branches:
+      - main
+    tags: '*'
+  pull_request:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  docbuild:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: '1.10'
+      - name: Install dependencies
+        run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+      - name: Build and deploy
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
+        run: julia --project=docs/ docs/make.jl
diff --git a/Project.toml b/Project.toml
index 4b44887..74ecc4d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -3,16 +3,19 @@ uuid = "0fe1646c-419e-43be-ac14-22321958931b"
 authors = ["CliMA Contributors <clima-software@caltech.edu>"]
 version = "0.1.0"
 
+[deps]
+
 [compat]
 julia = "1.10"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "JET", "OrderedCollections", "PrettyTables", "SafeTestsets", "Test"]
+test = ["Aqua", "JET", "InteractiveUtils", "OrderedCollections", "PrettyTables", "SafeTestsets", "Test"]
diff --git a/README.md b/README.md
index 8e1705b..9029a3f 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,22 @@
 # UnrolledUtilities.jl
 A Julia package that provides unrolled analogues to functions from `Base` and `Base.Iterators`
+
+|||
+|---------------------:|:----------------------------------------------|
+| **Documentation**    | [![dev][docs-dev-img]][docs-dev-url]          |
+| **Docs Build**       | [![docs build][docs-bld-img]][docs-bld-url]   |
+| **GHA CI**           | [![gha ci][gha-ci-img]][gha-ci-url]           |
+| **Code Coverage**    | [![codecov][codecov-img]][codecov-url]        |
+
+[docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg
+[docs-dev-url]: https://CliMA.github.io/UnrolledUtilities.jl/dev/
+
+[docs-bld-img]: https://github.com/CliMA/UnrolledUtilities.jl/actions/workflows/docs.yml/badge.svg
+[docs-bld-url]: https://github.com/CliMA/UnrolledUtilities.jl/actions/workflows/docs.yml
+
+[gha-ci-img]: https://github.com/CliMA/UnrolledUtilities.jl/actions/workflows/ci.yml/badge.svg
+[gha-ci-url]: https://github.com/CliMA/UnrolledUtilities.jl/actions/workflows/ci.yml
+
+[codecov-img]: https://codecov.io/gh/CliMA/UnrolledUtilities.jl/branch/main/graph/badge.svg
+[codecov-url]: https://codecov.io/gh/CliMA/UnrolledUtilities.jl
+
diff --git a/docs/Project.toml b/docs/Project.toml
new file mode 100644
index 0000000..db338c9
--- /dev/null
+++ b/docs/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/docs/make.jl b/docs/make.jl
new file mode 100644
index 0000000..ee728d5
--- /dev/null
+++ b/docs/make.jl
@@ -0,0 +1,34 @@
+using Documenter
+
+include(joinpath("..", "test", "test_and_analyze.jl"))
+
+comparison_table_file = joinpath("docs", "src", "comparison_table.md")
+
+open(comparison_table_file, "w") do io
+    println(io, "# Comparison Table\n```@raw html")
+    println(io, "<div style=\"width: max(80vw, 100%)\">") # use 80% of viewport
+    print_comparison_table(io, true)
+    println(io, "</div>")
+    println(io, "```")
+end
+
+makedocs(;
+    sitename = "UnrolledUtilities.jl",
+    modules = [UnrolledUtilities],
+    pages = ["Home" => "index.md", "Comparison Table" => "comparison_table.md"],
+    format = Documenter.HTML(
+        prettyurls = get(ENV, "CI", nothing) == "true",
+        size_threshold_ignore = ["comparison_table.md"],
+    ),
+    clean = true,
+)
+
+rm(comparison_table_file)
+
+deploydocs(
+    repo = "github.com/CliMA/UnrolledUtilities.jl.git",
+    target = "build",
+    devbranch = "main",
+    push_preview = true,
+    forcepush = true,
+)
diff --git a/docs/src/index.md b/docs/src/index.md
new file mode 100644
index 0000000..4d4007a
--- /dev/null
+++ b/docs/src/index.md
@@ -0,0 +1,40 @@
+#  UnrolledUtilities.jl
+
+A collection of generated functions in which all loops are unrolled and inlined.
+
+The functions exported by this module are
+- `unrolled_any(f, itr)`: similar to `any`
+- `unrolled_all(f, itr)`: similar to `all`
+- `unrolled_foreach(f, itrs...)`: similar to `foreach`
+- `unrolled_map(f, itrs...)`: similar to `map`
+- `unrolled_reduce(op, itr; [init])`: similar to `reduce`
+- `unrolled_mapreduce(f, op, itrs...; [init])`: similar to `mapreduce`
+- `unrolled_zip(itrs...)`: similar to `zip`
+- `unrolled_in(item, itr)`: similar to `in`
+- `unrolled_unique(itr)`: similar to `unique`
+- `unrolled_filter(f, itr)`: similar to `filter`
+- `unrolled_split(f, itr)`: similar to `(filter(f, itr), filter(!f, itr))`, but
+  without duplicate calls to `f`
+- `unrolled_flatten(itr)`: similar to `Iterators.flatten`
+- `unrolled_flatmap(f, itrs...)`: similar to `Iterators.flatmap`
+- `unrolled_product(itrs...)`: similar to `Iterators.product`
+- `unrolled_take(itr, ::Val{N})`: similar to `Iterators.take`, but with the
+  second argument wrapped in a `Val`
+- `unrolled_drop(itr, ::Val{N})`: similar to `Iterators.drop`, but with the
+  second argument wrapped in a `Val`
+
+These functions are guaranteed to be type-stable whenever they are given
+iterators with inferrable lengths and element types, including when
+- the iterators have nonuniform element types (with the exception of `map`, all
+  of the corresponding functions from `Base` encounter type-instabilities and
+  allocations when this is the case)
+- the iterators have many elements (e.g., more than 32, which is the threshold
+  at which `map` becomes type-unstable for `Tuple`s)
+- `f` and/or `op` recursively call the function to which they is passed, with an
+  arbitrarily large recursion depth (e.g., if `f` calls `map(f, itrs)`, it will
+  be type-unstable when the recursion depth exceeds 3, but this will not be the
+  case with `unrolled_map`)
+
+Moreover, these functions are very likely to be optimized out through constant
+propagation when the iterators have singleton element types (and when the result
+of calling `f` and/or `op` on these elements is inferrable).
diff --git a/src/UnrolledUtilities.jl b/src/UnrolledUtilities.jl
index 1563d17..d0931b9 100644
--- a/src/UnrolledUtilities.jl
+++ b/src/UnrolledUtilities.jl
@@ -1,45 +1,3 @@
-"""
-    UnrolledUtilities
-
-A collection of generated functions in which all loops are unrolled and inlined.
-
-The functions exported by this module are
-- `unrolled_any(f, itr)`: similar to `any`
-- `unrolled_all(f, itr)`: similar to `all`
-- `unrolled_foreach(f, itrs...)`: similar to `foreach`
-- `unrolled_map(f, itrs...)`: similar to `map`
-- `unrolled_reduce(op, itr; [init])`: similar to `reduce`
-- `unrolled_mapreduce(f, op, itrs...; [init])`: similar to `mapreduce`
-- `unrolled_zip(itrs...)`: similar to `zip`
-- `unrolled_in(item, itr)`: similar to `in`
-- `unrolled_unique(itr)`: similar to `unique`
-- `unrolled_filter(f, itr)`: similar to `filter`
-- `unrolled_split(f, itr)`: similar to `(filter(f, itr), filter(!f, itr))`, but
-  without duplicate calls to `f`
-- `unrolled_flatten(itr)`: similar to `Iterators.flatten`
-- `unrolled_flatmap(f, itrs...)`: similar to `Iterators.flatmap`
-- `unrolled_product(itrs...)`: similar to `Iterators.product`
-- `unrolled_take(itr, ::Val{N})`: similar to `Iterators.take`, but with the
-  second argument wrapped in a `Val`
-- `unrolled_drop(itr, ::Val{N})`: similar to `Iterators.drop`, but with the
-  second argument wrapped in a `Val`
-
-These functions are guaranteed to be type-stable whenever they are given
-iterators with inferrable lengths and element types, including when
-- the iterators have nonuniform element types (with the exception of `map`, all
-  of the corresponding functions from `Base` encounter type-instabilities and
-  allocations when this is the case)
-- the iterators have many elements (e.g., more than 32, which is the threshold
-  at which `map` becomes type-unstable for `Tuple`s)
-- `f` and/or `op` recursively call the function to which they is passed, with an
-  arbitrarily large recursion depth (e.g., if `f` calls `map(f, itrs)`, it will
-  be type-unstable when the recursion depth exceeds 3, but this will not be the
-  case with `unrolled_map`)
-
-Moreover, these functions are very likely to be optimized out through constant
-propagation when the iterators have singleton element types (and when the result
-of calling `f` and/or `op` on these elements is inferrable).
-"""
 module UnrolledUtilities
 
 export unrolled_any,
@@ -73,7 +31,7 @@ function zipped_f_exprs(itr_types)
     return (:(f($((:(itrs[$l][$n]) for l in 1:L)...))) for n in 1:N)
 end
 @inline @generated unrolled_foreach(f, itrs...) =
-    Expr(:block, zipped_f_exprs(itrs)...)
+    Expr(:block, zipped_f_exprs(itrs)..., nothing)
 @inline @generated unrolled_map(f, itrs...) =
     Expr(:tuple, zipped_f_exprs(itrs)...)
 
@@ -89,8 +47,8 @@ struct NoInit end
 @inline unrolled_reduce(op, itr; init = NoInit()) =
     unrolled_reduce_without_init(op, init isa NoInit ? itr : (init, itr...))
 
-@inline unrolled_mapreduce(f, op, itrs...; init_kwarg...) =
-    unrolled_reduce(op, unrolled_map(f, itrs...); init_kwarg...)
+@inline unrolled_mapreduce(f, op, itrs...; init = NoInit()) =
+    unrolled_reduce(op, unrolled_map(f, itrs...); init)
 
 @inline unrolled_zip(itrs...) = unrolled_map(tuple, itrs...)
 
diff --git a/test/runtests.jl b/test/runtests.jl
index be00ae4..631181c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,9 @@
 using SafeTestsets
 
-#! format: off
-@safetestset "Test and Analyze" begin @time include("test_and_analyze.jl") end
-@safetestset "Aqua" begin @time include("aqua.jl") end
-#! format: on
+@safetestset "Test and Analyze" begin
+    @time include("test_and_analyze.jl")
+    print_comparison_table()
+end
+@safetestset "Aqua" begin
+    @time include("aqua.jl")
+end
diff --git a/test/test_and_analyze.jl b/test/test_and_analyze.jl
index 74ba3d8..83ef9e0 100644
--- a/test/test_and_analyze.jl
+++ b/test/test_and_analyze.jl
@@ -2,10 +2,102 @@ using Test
 using JET
 using OrderedCollections
 using PrettyTables
+using InteractiveUtils
 
 using UnrolledUtilities
 
-measurements_dict = OrderedDict()
+comparison_table_dict = OrderedDict()
+
+function print_comparison_table(io = stdout, generate_html = false)
+    table_data =
+        mapreduce(vcat, collect(comparison_table_dict)) do (key, entries)
+            stack(entry -> (key..., entry...), entries; dims = 1)
+        end
+
+    highlighter(f, color) =
+        generate_html ? HtmlHighlighter(f, HtmlDecoration(; color)) :
+        Highlighter(f, Crayon(; foreground = color))
+
+    better_performance_but_harder_to_compile =
+        highlighter(generate_html ? "royalblue" : "blue") do data, i, j
+            data[i, 4] != data[i, 5] &&
+                (endswith(data[i, 6], "slower") || endswith(data[i, 7], "more"))
+        end
+    better_performance =
+        highlighter(generate_html ? "mediumseagreen" : "green") do data, i, j
+            data[i, 4] != data[i, 5]
+        end
+    mixed_compilation =
+        highlighter(generate_html ? "mediumorchid" : "magenta") do data, i, j
+            (endswith(data[i, 6], "slower") && endswith(data[i, 7], "less")) ||
+                (endswith(data[i, 6], "faster") && endswith(data[i, 7], "more"))
+        end
+    harder_to_compile =
+        highlighter(generate_html ? "indianred" : "red") do data, i, j
+            endswith(data[i, 6], "slower") || endswith(data[i, 7], "more")
+        end
+    easier_to_compile =
+        highlighter(generate_html ? "darkturquoise" : "cyan") do data, i, j
+            endswith(data[i, 6], "faster") || endswith(data[i, 7], "less")
+        end
+    no_difference =
+        highlighter((data, i, j) -> true, generate_html ? "khaki" : "yellow")
+
+    other_kwargs =
+        generate_html ?
+        (;
+            backend = Val(:html),
+            table_style = Dict(
+                "font-family" => "monospace",
+                "font-size" => "80%",
+            ),
+        ) :
+        (;
+            title_same_width_as_table = true,
+            columns_width = [45, 45, 0, 0, 0, 0, 0],
+            linebreaks = true,
+            autowrap = true,
+            crop = :none,
+        )
+
+    pretty_table(
+        io,
+        table_data;
+        title = "Comparison of UnrolledUtilities to Base and Base.Iterators",
+        title_alignment = :c,
+        alignment = :l,
+        header = [
+            "Unrolled Expression",
+            "Reference Expression",
+            "Iterator Contents",
+            "Unrolled Performance",
+            "Reference Performance",
+            "Unrolled Compile+Run Time",
+            "Unrolled Compile+Run Memory",
+        ],
+        highlighters = (
+            better_performance_but_harder_to_compile,
+            better_performance,
+            mixed_compilation,
+            harder_to_compile,
+            easier_to_compile,
+            no_difference,
+        ),
+        other_kwargs...,
+    )
+end
+
+function drop_line_numbers(expr)
+    expr isa Expr || return expr
+    new_args = map(drop_line_numbers, expr.args)
+    expr.head == :block || return Expr(expr.head, new_args...)
+    filtered_args = filter(arg -> !(arg isa LineNumberNode), new_args)
+    return length(filtered_args) == 1 ? filtered_args[1] :
+           Expr(expr.head, filtered_args...)
+end
+
+simplified_expression_string(expr) =
+    replace(string(drop_line_numbers(expr)), r"#=.+=# @" => '@', r"\s+" => ' ')
 
 function code_instance(f, args...)
     available_methods = methods(f, Tuple{map(typeof, args)...})
@@ -31,24 +123,24 @@ macro test_unrolled(args_expr, unrolled_expr, reference_expr, contents_info_str)
     arg_names = args_expr.args
     @assert all(arg_name -> arg_name isa Symbol, arg_names)
     args = map(esc, arg_names)
-    unrolled_expr_str =
-        replace(string(unrolled_expr), r"\s*#=.+=#" => "", r"\s+" => ' ')
-    reference_expr_str =
-        replace(string(reference_expr), r"\s*#=.+=#" => "", r"\s+" => ' ')
+    unrolled_expr_str = simplified_expression_string(unrolled_expr)
+    reference_expr_str = simplified_expression_string(reference_expr)
     expr_info_str =
         length(args) == 1 ? "$unrolled_expr_str with 1 iterator that contains" :
         "$unrolled_expr_str with $(length(args)) iterators that each contain"
     quote
         @info "Testing $($expr_info_str) $($(esc(contents_info_str)))"
 
-        unrolled_func($(arg_names...)) = $unrolled_expr
-        reference_func($(arg_names...)) = $reference_expr
+        unrolled_func($(arg_names...)) = $(esc(unrolled_expr))
+        reference_func($(arg_names...)) = $(esc(reference_expr))
 
         # Test for correctness.
         @test unrolled_func($(args...)) == reference_func($(args...))
 
-        unrolled_func_and_nothing($(arg_names...)) = ($unrolled_expr; nothing)
-        reference_func_and_nothing($(arg_names...)) = ($reference_expr; nothing)
+        unrolled_func_and_nothing($(arg_names...)) =
+            ($(esc(unrolled_expr)); nothing)
+        reference_func_and_nothing($(arg_names...)) =
+            ($(esc(reference_expr)); nothing)
 
         unrolled_func_and_nothing($(args...)) # Run once to compile.
         reference_func_and_nothing($(args...))
@@ -71,6 +163,17 @@ macro test_unrolled(args_expr, unrolled_expr, reference_expr, contents_info_str)
         Base.issingletontype(typeof(($(args...),))) && @test is_unrolled_const
         is_reference_const = isdefined(reference_instance, :rettype_const)
 
+        buffer = IOBuffer()
+
+        # Check whether the functions are fully optimized out.
+        args_type = Tuple{map(typeof, ($(args...),))...}
+        code_llvm(buffer, unrolled_func, args_type; debuginfo = :none)
+        is_unrolled_optimized_out =
+            length(split(String(take!(buffer)), '\n')) == 5
+        code_llvm(buffer, reference_func, args_type; debuginfo = :none)
+        is_reference_optimized_out =
+            length(split(String(take!(buffer)), '\n')) == 5
+
         arg_name_strs = ($(map(string, arg_names)...),)
         arg_names_str = join(arg_name_strs, ", ")
         arg_definition_strs =
@@ -90,27 +193,41 @@ macro test_unrolled(args_expr, unrolled_expr, reference_expr, contents_info_str)
             print(stats.time, ',', stats.bytes)
             """
 
-        # Get the compilation times and allocations.
-        buffer1 = IOBuffer()
-        run(pipeline(`julia --project -e $unrolled_command_str`, buffer1))
-        unrolled_time, unrolled_allocs =
-            parse.((Float64, Int), split(String(take!(buffer1)), ','))
-        close(buffer1)
-        buffer2 = IOBuffer()
-        run(pipeline(`julia --project -e $reference_command_str`, buffer2))
-        reference_time, reference_allocs =
-            parse.((Float64, Int), split(String(take!(buffer2)), ','))
-        close(buffer2)
-
-        # Record all of the measurements.
-        unrolled_performance_str =
-            is_unrolled_const ? "constant" : "type-stable"
+        # Get the unrolled function's time-to-first-run and its memory usage.
+        run(pipeline(`julia --project -e $unrolled_command_str`, buffer))
+        unrolled_time, unrolled_memory =
+            parse.((Float64, Int), split(String(take!(buffer)), ','))
+
+        # Make a new buffer to avoid a potential data race:
+        # https://discourse.julialang.org/t/iobuffer-becomes-not-writable-after-run/92323/3
+        close(buffer)
+        buffer = IOBuffer()
+
+        # Get the reference function's time-to-first-run and its memory usage.
+        run(pipeline(`julia --project -e $reference_command_str`, buffer))
+        reference_time, reference_memory =
+            parse.((Float64, Int), split(String(take!(buffer)), ','))
+
+        close(buffer)
+
+        # Record all relevant information in comparison_table_dict.
+        unrolled_performance_str = if !is_unrolled_const
+            "type-stable"
+        elseif !is_unrolled_optimized_out
+            "const return value"
+        else
+            "fully optimized out"
+        end
         reference_performance_str = if !is_reference_non_allocating
             "allocating"
         elseif !is_reference_stable
             "type-unstable"
+        elseif !is_reference_const
+            "type-stable"
+        elseif !is_reference_optimized_out
+            "const return value"
         else
-            is_reference_const ? "constant" : "type-stable"
+            "fully optimized out"
         end
         time_ratio = unrolled_time / reference_time
         time_ratio_str = if time_ratio >= 1.5
@@ -120,26 +237,26 @@ macro test_unrolled(args_expr, unrolled_expr, reference_expr, contents_info_str)
         else
             "similar"
         end
-        allocs_ratio = unrolled_allocs / reference_allocs
-        allocs_ratio_str = if allocs_ratio >= 1.5
-            "$(round(Int, allocs_ratio)) times more"
-        elseif inv(allocs_ratio) >= 1.5
-            "$(round(Int, inv(allocs_ratio))) times less"
+        memory_ratio = unrolled_memory / reference_memory
+        memory_ratio_str = if memory_ratio >= 1.5
+            "$(round(Int, memory_ratio)) times more"
+        elseif inv(memory_ratio) >= 1.5
+            "$(round(Int, inv(memory_ratio))) times less"
         else
             "similar"
         end
-        measurement_key = ($unrolled_expr_str, $reference_expr_str)
-        measurement_entry = (
+        dict_key = ($unrolled_expr_str, $reference_expr_str)
+        dict_entry = (
             $(esc(contents_info_str)),
             unrolled_performance_str,
             reference_performance_str,
             time_ratio_str,
-            allocs_ratio_str,
+            memory_ratio_str,
         )
-        if measurement_key in keys(measurements_dict)
-            push!(measurements_dict[measurement_key], measurement_entry)
+        if dict_key in keys(comparison_table_dict)
+            push!(comparison_table_dict[dict_key], dict_entry)
         else
-            measurements_dict[measurement_key] = [measurement_entry]
+            comparison_table_dict[dict_key] = [dict_entry]
         end
     end
 end
@@ -159,10 +276,10 @@ end
     )
 end
 
-for n in (1, 10, 33), all_identical in (n == 1 ? (true,) : (true, false))
-    itr1 = ntuple(i -> ntuple(Val, all_identical ? 0 : (i - 1) % 7), n)
-    itr2 = ntuple(i -> ntuple(Val, all_identical ? 1 : (i - 1) % 7 + 1), n)
-    itr3 = ntuple(i -> ntuple(identity, all_identical ? 1 : (i - 1) % 7 + 1), n)
+for n in (1, 8, 32, 33, 128), identical in (n == 1 ? (true,) : (true, false))
+    itr1 = ntuple(i -> ntuple(Val, identical ? 0 : (i - 1) % 7), n)
+    itr2 = ntuple(i -> ntuple(Val, identical ? 1 : (i - 1) % 7 + 1), n)
+    itr3 = ntuple(i -> ntuple(identity, identical ? 1 : (i - 1) % 7 + 1), n)
     if n == 1
         str1 = "1 empty tuple"
         str2 = "1 nonempty singleton tuple"
@@ -170,7 +287,7 @@ for n in (1, 10, 33), all_identical in (n == 1 ? (true,) : (true, false))
         str12 = "1 singleton tuple"
         str23 = "1 nonempty tuple"
         str123 = "1 tuple"
-    elseif all_identical
+    elseif identical
         str1 = "$n empty tuples"
         str2 = "$n identical nonempty singleton tuples"
         str3 = "$n identical nonempty non-singleton tuples"
@@ -195,8 +312,8 @@ for n in (1, 10, 33), all_identical in (n == 1 ? (true,) : (true, false))
 
             @test_unrolled(
                 (itr,),
-                unrolled_foreach(x -> (@assert length(x) <= 7), itr),
-                foreach(x -> (@assert length(x) <= 7), itr),
+                unrolled_foreach(x -> @assert(length(x) <= 7), itr),
+                foreach(x -> @assert(length(x) <= 7), itr),
                 str,
             )
 
@@ -229,6 +346,7 @@ for n in (1, 10, 33), all_identical in (n == 1 ? (true,) : (true, false))
             @test_unrolled (itr,) unrolled_in(itr[1], itr) (itr[1] in itr) str
             @test_unrolled (itr,) unrolled_in(itr[end], itr) (itr[end] in itr) str
 
+            # unrolled_unique is only type-stable for singletons
             if Base.issingletontype(typeof(itr))
                 @test_unrolled (itr,) unrolled_unique(itr) Tuple(unique(itr)) str
             end
@@ -301,21 +419,21 @@ for n in (1, 10, 33), all_identical in (n == 1 ? (true,) : (true, false))
         @test_unrolled(
             (itr1, itr2),
             unrolled_foreach(
-                (x1, x2) -> (@assert length(x1) < length(x2)),
+                (x1, x2) -> @assert(length(x1) < length(x2)),
                 itr1,
                 itr2,
             ),
-            foreach((x1, x2) -> (@assert length(x1) < length(x2)), itr1, itr2),
+            foreach((x1, x2) -> @assert(length(x1) < length(x2)), itr1, itr2),
             str12,
         )
         @test_unrolled(
             (itr2, itr3),
             unrolled_foreach(
-                (x2, x3) -> (@assert x2 == unrolled_map(Val, x3)),
+                (x2, x3) -> @assert(x2 == unrolled_map(Val, x3)),
                 itr2,
                 itr3,
             ),
-            foreach((x2, x3) -> (@assert x2 == map(Val, x3)), itr2, itr3),
+            foreach((x2, x3) -> @assert(x2 == map(Val, x3)), itr2, itr3),
             str23,
         )
 
@@ -332,13 +450,16 @@ for n in (1, 10, 33), all_identical in (n == 1 ? (true,) : (true, false))
             str123,
         )
 
-        @test_unrolled(
-            (itr1, itr2),
-            unrolled_product(itr1, itr2),
-            Tuple(Iterators.product(itr1, itr2)),
-            str12,
-        )
-        if n <= 10 # This can take several minutes to compile when n is large.
+        # unrolled_product can take several minutes to compile when n is large
+        if n <= 33
+            @test_unrolled(
+                (itr1, itr2),
+                unrolled_product(itr1, itr2),
+                Tuple(Iterators.product(itr1, itr2)),
+                str12,
+            )
+        end
+        if n <= 8
             @test_unrolled(
                 (itr1, itr2, itr3),
                 unrolled_product(itr1, itr2, itr3),
@@ -349,49 +470,52 @@ for n in (1, 10, 33), all_identical in (n == 1 ? (true,) : (true, false))
     end
 end
 
-table_data = mapreduce(vcat, collect(measurements_dict)) do (key, entries)
-    stack(entry -> (key..., entry...), entries; dims = 1)
-end
-header_line1 = [
-    "Unrolled Expression",
-    "Reference Expression",
-    "Iterator Contents",
-    "Unrolled Performance",
-    "Reference Performance",
-    "Compilation Time",
-    "Compilation Memory",
-]
-header_line2 =
-    ["", "", "", "", "", "(Unrolled vs. Reference)", "(Unrolled vs. Reference)"]
-better_performance_but_harder_to_compile =
-    Highlighter(crayon"blue") do data, i, j
-        data[i, 4] != data[i, 5] &&
-            (endswith(data[i, 6], "slower") || endswith(data[i, 7], "more"))
+nested_iterator(depth, n, inner_n) =
+    depth == 1 ? ntuple(identity, n) :
+    ntuple(inner_n) do _
+        nested_iterator(depth - 1, Int(n / inner_n), inner_n)
+    end
+
+for n in (8, 32, 128)
+    @testset "iterators of $n values in nested tuples" begin
+        for depth in (2, 3, 4:2:(Int(log2(n)) + 1)...)
+            itr = nested_iterator(depth, n, 2)
+            str = "$n values in nested tuples of depth $depth"
+            # In the following definitions, use var"#self#" to avoid boxing:
+            # https://discourse.julialang.org/t/performant-recursive-anonymous-functions/90984/5
+            @test_unrolled(
+                (itr,),
+                map(
+                    x ->
+                        eltype(x) <: Tuple ?
+                        unrolled_mapreduce(var"#self#", +, x) : length(x),
+                    (itr,),
+                )[1],
+                map(
+                    x ->
+                        eltype(x) <: Tuple ? mapreduce(var"#self#", +, x) :
+                        length(x),
+                    (itr,),
+                )[1],
+                str,
+            ) # nested iterator length
+            @test_unrolled(
+                (itr,),
+                map(
+                    x ->
+                        eltype(x) <: Tuple ?
+                        unrolled_mapreduce(var"#self#", +, x) :
+                        unrolled_reduce(+, x),
+                    (itr,),
+                )[1],
+                map(
+                    x ->
+                        eltype(x) <: Tuple ? mapreduce(var"#self#", +, x) :
+                        reduce(+, x),
+                    (itr,),
+                )[1],
+                str,
+            ) # nested iterator sum
+        end
     end
-better_performance =
-    Highlighter((data, i, j) -> data[i, 4] != data[i, 5], crayon"green")
-harder_to_compile = Highlighter(crayon"red") do data, i, j
-    endswith(data[i, 6], "slower") || endswith(data[i, 7], "more")
-end
-easier_to_compile = Highlighter(crayon"magenta") do data, i, j
-    endswith(data[i, 6], "faster") || endswith(data[i, 7], "less")
 end
-no_difference = Highlighter((data, i, j) -> true, crayon"yellow")
-pretty_table(
-    table_data;
-    title = "Comparison between UnrolledUtilities and Base/Base.Iterators",
-    header = (header_line1, header_line2),
-    subheader_crayon = crayon"bold",
-    highlighters = (
-        better_performance_but_harder_to_compile,
-        better_performance,
-        harder_to_compile,
-        easier_to_compile,
-        no_difference,
-    ),
-    title_same_width_as_table = true,
-    title_alignment = :c,
-    alignment = :l,
-    columns_width = [45, 45, 0, 0, 0, 0, 0],
-    crop = :none,
-)