From e2d432d842e475031b4f19d6fbd07178eb515f40 Mon Sep 17 00:00:00 2001
From: Greg Becker <becker33@llnl.gov>
Date: Tue, 8 Oct 2024 21:07:20 -0700
Subject: [PATCH] Add test suite (#384)

* Includes new command `benchpark unit-test`
  This command makes use of pytest, which can accept more arguments
  that are specified as command-line option: add a "pass-through"
  mechanism for arguments that are unknown to the command (but
  e.g. in this case are known to pytest)
* Add a `spack.yaml` environment file which makes it easier to
  install the resources needed to run unit tests (e.g. pytest);
  these are not included in Benchpark's `requirements.txt`
* Tests are added for Spec syntax and for the Experiment class.
  Coverage is currently low but that is a matter for future PRs.
* This includes a minor refactor of the logic used in the library
  code to locate Benchpark's root directory (previously
  `source_location`, now accessable as `paths.benchpark_root`)

---------

Co-authored-by: Alec Scott <scott112@llnl.gov>
Co-authored-by: pearce8 <pearce8@llnl.gov>
Co-authored-by: Peter Josef Scheibel <scheibel1@llnl.gov>
---
 .envrc                            |  10 ++
 .github/workflows/run.yml         |   4 +-
 bin/benchpark                     |   6 +-
 lib/benchpark/accounting.py       |   8 +-
 lib/benchpark/cmd/setup.py        |   9 +-
 lib/benchpark/cmd/unit_test.py    | 232 ++++++++++++++++++++++++++++
 lib/benchpark/paths.py            |  15 +-
 lib/benchpark/repo.py             |  12 +-
 lib/benchpark/runtime.py          |  11 +-
 lib/benchpark/spec.py             | 122 ++++++++++++---
 lib/benchpark/test/__init__.py    |   0
 lib/benchpark/test/conftest.py    |   0
 lib/benchpark/test/experiment.py  |  81 ++++++++++
 lib/benchpark/test/paths.py       |  13 ++
 lib/benchpark/test/spec_syntax.py | 248 ++++++++++++++++++++++++++++++
 lib/main.py                       |  40 ++++-
 pytest.ini                        |   4 +
 spack.yaml                        |  13 ++
 18 files changed, 772 insertions(+), 56 deletions(-)
 create mode 100644 .envrc
 create mode 100644 lib/benchpark/cmd/unit_test.py
 create mode 100644 lib/benchpark/test/__init__.py
 create mode 100644 lib/benchpark/test/conftest.py
 create mode 100644 lib/benchpark/test/experiment.py
 create mode 100644 lib/benchpark/test/paths.py
 create mode 100644 lib/benchpark/test/spec_syntax.py
 create mode 100644 pytest.ini
 create mode 100644 spack.yaml

diff --git a/.envrc b/.envrc
new file mode 100644
index 000000000..41a5bd355
--- /dev/null
+++ b/.envrc
@@ -0,0 +1,10 @@
+#------------------------------------------------------------------------
+# Load Development Spack Environment (If Spack is installed.)
+#
+# Run 'direnv allow' from within the cloned repository to automatically
+# load the spack environment when you enter the directory.
+#------------------------------------------------------------------------
+if type spack &>/dev/null; then
+    . $SPACK_ROOT/share/spack/setup-env.sh
+    spack env activate -d .
+fi
diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml
index 51fb0c153..09149f5c2 100644
--- a/.github/workflows/run.yml
+++ b/.github/workflows/run.yml
@@ -179,7 +179,7 @@ jobs:
           ./bin/benchpark setup kripke/rocm ./tioga-system workspace/
           . workspace/setup.sh
           ramble \
-            --workspace-dir workspace/kripke/rocm/Tioga-d34a754/workspace \
+            --workspace-dir workspace/kripke/rocm/Tioga-975af3c/workspace \
             --disable-progress-bar \
             --disable-logger \
             workspace setup --dry-run
@@ -202,7 +202,7 @@ jobs:
           ./bin/benchpark setup ./saxpy-rocm2 ./tioga-system2 workspace/
           . workspace/setup.sh
           ramble \
-            --workspace-dir workspace/saxpy-rocm2/Tioga-d34a754/workspace \
+            --workspace-dir workspace/saxpy-rocm2/Tioga-975af3c/workspace \
             --disable-progress-bar \
             --disable-logger \
             workspace setup --dry-run
diff --git a/bin/benchpark b/bin/benchpark
index dcbf5de63..32b39a43d 100755
--- a/bin/benchpark
+++ b/bin/benchpark
@@ -12,9 +12,9 @@ import sys
 
 
 def main():
-    basedir = pathlib.Path(os.path.abspath(__file__)).parent.parent
-    main = basedir / "lib" / "main.py"
-    subprocess.run([sys.executable, main] + sys.argv[1:], check=True)
+    basedir = pathlib.Path(__file__).resolve().parents[1]
+    main_py = basedir / "lib" / "main.py"
+    subprocess.run([sys.executable, main_py] + sys.argv[1:], check=True)
 
 
 if __name__ == "__main__":
diff --git a/lib/benchpark/accounting.py b/lib/benchpark/accounting.py
index 443b41cff..fc00451b6 100644
--- a/lib/benchpark/accounting.py
+++ b/lib/benchpark/accounting.py
@@ -5,11 +5,11 @@
 
 import os
 
-from benchpark.paths import source_location
+import benchpark.paths
 
 
 def benchpark_experiments():
-    source_dir = source_location()
+    source_dir = benchpark.paths.benchpark_root
     experiments = []
     experiments_dir = source_dir / "experiments"
     for x in os.listdir(experiments_dir):
@@ -19,7 +19,7 @@ def benchpark_experiments():
 
 
 def benchpark_modifiers():
-    source_dir = source_location()
+    source_dir = benchpark.paths.benchpark_root
     modifiers = []
     for x in os.listdir(source_dir / "modifiers"):
         modifiers.append(x)
@@ -27,7 +27,7 @@ def benchpark_modifiers():
 
 
 def benchpark_systems():
-    source_dir = source_location()
+    source_dir = benchpark.paths.benchpark_root
     systems = []
     for x in os.listdir(source_dir / "configs"):
         if not (
diff --git a/lib/benchpark/cmd/setup.py b/lib/benchpark/cmd/setup.py
index 599afab76..169141499 100644
--- a/lib/benchpark/cmd/setup.py
+++ b/lib/benchpark/cmd/setup.py
@@ -9,15 +9,16 @@
 import pathlib
 import shutil
 import sys
+
 import yaml
 
+import benchpark.paths
 from benchpark.accounting import (
     benchpark_experiments,
     benchpark_modifiers,
     benchpark_systems,
 )
 from benchpark.debug import debug_print
-from benchpark.paths import source_location
 from benchpark.runtime import RuntimeResources
 
 
@@ -87,7 +88,7 @@ def benchpark_check_experiment(arg_str):
         )
         raise ValueError(out_str)
 
-    experiment_src_dir = source_location() / "experiments" / str(arg_str)
+    experiment_src_dir = benchpark.paths.benchpark_root / "experiments" / str(arg_str)
     return arg_str, experiment_src_dir
 
 
@@ -115,7 +116,7 @@ def benchpark_check_system(arg_str):
         )
         raise ValueError(out_str)
 
-    configs_src_dir = source_location() / "configs" / str(arg_str)
+    configs_src_dir = benchpark.paths.benchpark_root / "configs" / str(arg_str)
     return arg_str, configs_src_dir
 
 
@@ -145,7 +146,7 @@ def command(args):
 
     experiments_root = pathlib.Path(os.path.abspath(args.experiments_root))
     modifier = args.modifier
-    source_dir = source_location()
+    source_dir = benchpark.paths.benchpark_root
     debug_print(f"source_dir = {source_dir}")
     experiment_id, experiment_src_dir = benchpark_check_experiment(args.experiment)
     debug_print(f"specified experiment (benchmark/ProgrammingModel) = {experiment_id}")
diff --git a/lib/benchpark/cmd/unit_test.py b/lib/benchpark/cmd/unit_test.py
new file mode 100644
index 000000000..954d8593c
--- /dev/null
+++ b/lib/benchpark/cmd/unit_test.py
@@ -0,0 +1,232 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+import argparse
+import collections
+import io
+import pytest
+import os
+import re
+import sys
+
+import llnl.util.filesystem
+import llnl.util.tty.color as color
+import llnl.util.tty.colify as colify
+
+import benchpark.paths
+
+
+def setup_parser(subparser):
+    subparser.add_argument(
+        "-H",
+        "--pytest-help",
+        action="store_true",
+        default=False,
+        help="show full pytest help, with advanced options",
+    )
+
+    subparser.add_argument(
+        "-n",
+        "--numprocesses",
+        type=int,
+        default=1,
+        help="run tests in parallel up to this wide, default 1 for sequential",
+    )
+
+    # extra arguments to list tests
+    list_group = subparser.add_argument_group("listing tests")
+    list_mutex = list_group.add_mutually_exclusive_group()
+    list_mutex.add_argument(
+        "-l",
+        "--list",
+        action="store_const",
+        default=None,
+        dest="list",
+        const="list",
+        help="list test filenames",
+    )
+    list_mutex.add_argument(
+        "-L",
+        "--list-long",
+        action="store_const",
+        default=None,
+        dest="list",
+        const="long",
+        help="list all test functions",
+    )
+    list_mutex.add_argument(
+        "-N",
+        "--list-names",
+        action="store_const",
+        default=None,
+        dest="list",
+        const="names",
+        help="list full names of all tests",
+    )
+
+    # spell out some common pytest arguments, so they'll show up in help
+    pytest_group = subparser.add_argument_group(
+        "common pytest arguments (benchpark unit-test --pytest-help for more)"
+    )
+    pytest_group.add_argument(
+        "-s",
+        action="append_const",
+        dest="parsed_args",
+        const="-s",
+        help="print output while tests run (disable capture)",
+    )
+    pytest_group.add_argument(
+        "-k",
+        action="store",
+        metavar="EXPRESSION",
+        dest="expression",
+        help="filter tests by keyword (can also use w/list options)",
+    )
+    pytest_group.add_argument(
+        "--showlocals",
+        action="append_const",
+        dest="parsed_args",
+        const="--showlocals",
+        help="show local variable values in tracebacks",
+    )
+
+    # remainder is just passed to pytest
+    subparser.add_argument(
+        "pytest_args", nargs=argparse.REMAINDER, help="arguments for pytest"
+    )
+
+
+def do_list(args, extra_args):
+    """Print a lists of tests than what pytest offers."""
+
+    def colorize(c, prefix):
+        if isinstance(prefix, tuple):
+            return "::".join(
+                color.colorize("@%s{%s}" % (c, p)) for p in prefix if p != "()"
+            )
+        return color.colorize("@%s{%s}" % (c, prefix))
+
+    # To list the files we just need to inspect the filesystem,
+    # which doesn't need to wait for pytest collection and doesn't
+    # require parsing pytest output
+    files = llnl.util.filesystem.find(
+        root=benchpark.paths.test_path, files="*.py", recursive=True
+    )
+    files = [
+        os.path.relpath(f, start=benchpark.paths.benchpark_root)
+        for f in files
+        if not f.endswith(("conftest.py", "__init__.py"))
+    ]
+
+    old_output = sys.stdout
+    try:
+        sys.stdout = output = io.StringIO()
+        pytest.main(["--collect-only"] + extra_args)
+    finally:
+        sys.stdout = old_output
+
+    lines = output.getvalue().split("\n")
+    tests = collections.defaultdict(set)
+
+    # collect tests into sections
+    node_regexp = re.compile(r"(\s*)<([^ ]*) ['\"]?([^']*)['\"]?>")
+    key_parts, name_parts = [], []
+    for line in lines:
+        match = node_regexp.match(line)
+        if not match:
+            continue
+        indent, nodetype, name = match.groups()
+
+        # strip parametrized tests
+        if "[" in name:
+            name = name[: name.index("[")]
+
+        len_indent = len(indent)
+        if os.path.isabs(name):
+            name = os.path.relpath(name, start=benchpark.paths.benchpark_root)
+
+        item = (len_indent, name, nodetype)
+
+        # Reduce the parts to the scopes that are of interest
+        name_parts = [x for x in name_parts if x[0] < len_indent]
+        key_parts = [x for x in key_parts if x[0] < len_indent]
+
+        # From version 3.X to version 6.X the output format
+        # changed a lot in pytest, and probably will change
+        # in the future - so this manipulation might be fragile
+        if nodetype.lower() == "function":
+            name_parts.append(item)
+            key_end = os.path.join(*key_parts[-1][1].split("/"))
+            key = next(f for f in files if f.endswith(key_end))
+            tests[key].add(tuple(x[1] for x in name_parts))
+        elif nodetype.lower() == "class":
+            name_parts.append(item)
+        elif nodetype.lower() in ("package", "module"):
+            key_parts.append(item)
+
+    if args.list == "list":
+        files = set(tests.keys())
+        color_files = [colorize("B", file) for file in sorted(files)]
+        colify.colify(color_files)
+
+    elif args.list == "long":
+        for prefix, functions in sorted(tests.items()):
+            path = colorize("*B", prefix) + "::"
+            functions = [colorize("c", f) for f in sorted(functions)]
+            color.cprint(path)
+            colify.colify(functions, indent=4)
+            print()
+
+    else:  # args.list == "names"
+        all_functions = [
+            colorize("*B", prefix) + "::" + colorize("c", f)
+            for prefix, functions in sorted(tests.items())
+            for f in sorted(functions)
+        ]
+        colify.colify(all_functions)
+
+
+def add_back_pytest_args(args, unknown_args):
+    """Add parsed pytest args, unknown args, and remainder together.
+
+    We add some basic pytest arguments to the Spack parser to ensure that
+    they show up in the short help, so we have to reassemble things here.
+    """
+    result = args.parsed_args or []
+    result += unknown_args or []
+    result += args.pytest_args or []
+    if args.expression:
+        result += ["-k", args.expression]
+    return result
+
+
+def command(args, unknown_args):
+    global pytest
+
+    if args.pytest_help:
+        # make the pytest.main help output more accurate
+        sys.argv[0] = "spack unit-test"
+        return pytest.main(["-h"])
+
+    # add back any parsed pytest args we need to pass to pytest
+    pytest_args = add_back_pytest_args(args, unknown_args)
+    pytest_root = benchpark.paths.benchpark_root
+
+    if args.numprocesses is not None and args.numprocesses > 1:
+        pytest_args.extend(
+            [
+                "--dist",
+                "loadfile",
+                "--tx",
+                f"{args.numprocesses}*popen//python=benchpark-tmpconfig benchpark-python",
+            ]
+        )
+
+    # pytest.ini lives in the root of the spack repository.
+    with llnl.util.filesystem.working_dir(pytest_root):
+        if args.list:
+            do_list(args, pytest_args)
+            return
+
+        return pytest.main(pytest_args)
diff --git a/lib/benchpark/paths.py b/lib/benchpark/paths.py
index d6a237669..6a71417f0 100644
--- a/lib/benchpark/paths.py
+++ b/lib/benchpark/paths.py
@@ -6,11 +6,16 @@
 import os
 import pathlib
 
+
+def _source_location() -> pathlib.Path:
+    """Return the location of the project source files directory."""
+    path_to_this_file = __file__
+    return pathlib.Path(path_to_this_file).resolve().parents[2]
+
+
+benchpark_root = _source_location()
+lib_path = benchpark_root / "lib" / "benchpark"
+test_path = lib_path / "test"
 benchpark_home = pathlib.Path(os.path.expanduser("~/.benchpark"))
 global_ramble_path = benchpark_home / "ramble"
 global_spack_path = benchpark_home / "spack"
-
-
-def source_location():
-    the_directory_with_this_file = os.path.dirname(os.path.abspath(__file__))
-    return pathlib.Path(the_directory_with_this_file).parent.parent
diff --git a/lib/benchpark/repo.py b/lib/benchpark/repo.py
index 9f386a643..c75ec5fcf 100644
--- a/lib/benchpark/repo.py
+++ b/lib/benchpark/repo.py
@@ -5,10 +5,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import sys
 import contextlib
-import pathlib
-
+import sys
 from enum import Enum
 
 import benchpark.paths
@@ -82,10 +80,6 @@ def override_ramble_hardcoded_globals():
     ramble.language.language_base.namespaces = _old[2]
 
 
-def _base_path():
-    return pathlib.Path(__file__).resolve().parents[2]
-
-
 # Experiments
 def _exprs():
     """Get the singleton RepoPath instance for Ramble.
@@ -94,7 +88,7 @@ def _exprs():
 
     TODO: consider not making this a singleton.
     """
-    experiments_repo = _base_path() / "var" / "exp_repo"
+    experiments_repo = benchpark.paths.benchpark_root / "var" / "exp_repo"
     return _add_repo(experiments_repo, ObjectTypes.experiments)
 
 
@@ -112,7 +106,7 @@ def _add_repo(repo_dir, obj_type):
 
 # Systems
 def _systems():
-    systems_repo = _base_path() / "var" / "sys_repo"
+    systems_repo = benchpark.paths.benchpark_root / "var" / "sys_repo"
     return _add_repo(systems_repo, ObjectTypes.systems)
 
 
diff --git a/lib/benchpark/runtime.py b/lib/benchpark/runtime.py
index 1e4fc86dd..6637b5c96 100644
--- a/lib/benchpark/runtime.py
+++ b/lib/benchpark/runtime.py
@@ -2,14 +2,18 @@
 # Benchpark Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: Apache-2.0
+
 from contextlib import contextmanager
 import os
 import pathlib
 import shlex
 import subprocess
 import sys
+
 import yaml
 
+import benchpark.paths
+
 DEBUG = False
 
 
@@ -35,11 +39,6 @@ def git_clone_commit(url, commit, destination):
         run_command(f"git checkout {commit}")
 
 
-def benchpark_root():
-    this_module_path = pathlib.Path(os.path.abspath(__file__))
-    return this_module_path.parents[2]
-
-
 def run_command(command_str, env=None):
     proc = subprocess.Popen(
         shlex.split(command_str),
@@ -70,7 +69,7 @@ def __call__(self, *args):
 
 class RuntimeResources:
     def __init__(self, dest):
-        self.root = benchpark_root()
+        self.root = benchpark.paths.benchpark_root
         self.dest = pathlib.Path(dest)
 
         checkout_versions_location = self.root / "checkout-versions.yaml"
diff --git a/lib/benchpark/spec.py b/lib/benchpark/spec.py
index eaae9999d..e69674044 100644
--- a/lib/benchpark/spec.py
+++ b/lib/benchpark/spec.py
@@ -6,6 +6,8 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import enum
+import io
+import json
 import pathlib
 import re
 from typing import Iterable, Iterator, List, Match, Optional, Union
@@ -66,12 +68,35 @@ def stringify(name: str, values: tuple) -> str:
                 return f"+{name}"
             if values[0].lower() == "false":
                 return f"~{name}"
-        return f"{name}={','.join(values)}"
+        v_string = quote_if_needed(",".join(values))
+        return f"{name}={v_string}"
 
     def __str__(self):
-        return " ".join(
-            self.stringify(name, values) for name, values in self.dict.items()
-        )
+        if not self:
+            return ""
+
+        # print keys in order
+        sorted_keys = sorted(self.keys())
+
+        # Separate boolean variants from key-value pairs as they print
+        # differently. All booleans go first to avoid ' ~foo' strings that
+        # break spec reuse in zsh.
+        bool_keys = []
+        kv_keys = []
+        for key in sorted_keys:
+            is_bool = len(self[key]) == 1 and self[key][0].lower() in ("true", "false")
+            bool_keys.append(key) if is_bool else kv_keys.append(key)
+
+        # add spaces before and after key/value variants.
+        string = io.StringIO()
+
+        string.write("".join(self.stringify(key, self[key]) for key in bool_keys))
+        if bool_keys and kv_keys:
+            string.write(" ")
+
+        string.write(" ".join(self.stringify(key, self[key]) for key in kv_keys))
+
+        return string.getvalue()
 
 
 class ConcreteVariantMap(VariantMap):
@@ -149,9 +174,9 @@ def __str__(self):
             string += self.name
 
         variants = str(self.variants)
-        if not string:
-            return variants
-        string += f" {variants}" if variants else ""
+        if string and variants and not variants.startswith(("+", "~")):
+            string += " "
+        string += variants
         return string
 
     def __repr__(self):
@@ -368,8 +393,14 @@ def system(self) -> "benchpark.System":
 DOTTED_IDENTIFIER = rf"(?:{IDENTIFIER}(?:\.{IDENTIFIER})+)"
 NAME = r"[a-zA-Z_0-9][a-zA-Z_0-9\-.]*"
 
-#: These are legal values that *can* be parsed bare, without quotes on the command line.
-VALUE = r"(?:[a-zA-Z_0-9\-+\*.,:=\~\/\\]+)"
+#: These are legal values that *can* be parsed bare, without quotes on the command line. Cannot start with `=`
+VALUE = r"(?:[a-zA-Z_0-9\-+\*.,:\~\/\\][a-zA-Z_0-9\-+\*.,:=\~\/\\]*)"
+
+#: Variant/flag values that match this can be left unquoted in Spack output
+NO_QUOTES_NEEDED = re.compile(r"^[a-zA-Z0-9,/_.-]+$")
+
+#: Quoted values can be *anything* in between quotes, including escaped quotes.
+QUOTED_VALUE = r"(?:'(?:[^']|(?<=\\)')*'|\"(?:[^\"]|(?<=\\)\")*\")"
 
 #: Regex with groups to use for splitting (optionally propagated) key-value pairs
 SPLIT_KVP = re.compile(rf"^({NAME})=(.*)$")
@@ -401,10 +432,10 @@ class TokenType(TokenBase):
 
     # variants
     BOOL_VARIANT = rf"(?:[~+-]\s*{NAME})"
-    KEY_VALUE_PAIR = rf"(?:{NAME}=(?:{VALUE}))"
+    KEY_VALUE_PAIR = rf"(?:{NAME}=(?:{VALUE}|{QUOTED_VALUE}))"
     # Package name
-    FULLY_QUALIFIED_PACKAGE_NAME = rf"(?:{DOTTED_IDENTIFIER})"
-    UNQUALIFIED_PACKAGE_NAME = rf"(?:{IDENTIFIER})"
+    FULLY_QUALIFIED_SPEC_NAME = rf"(?:{DOTTED_IDENTIFIER})"
+    UNQUALIFIED_SPEC_NAME = rf"(?:{IDENTIFIER})"
     # White spaces
     WS = r"(?:\s+)"
 
@@ -416,6 +447,58 @@ class ErrorTokenType(TokenBase):
     UNEXPECTED = r"(?:.[\s]*)"
 
 
+#: Regex to strip quotes. Group 2 will be the unquoted string.
+STRIP_QUOTES = re.compile(r"^(['\"])(.*)\1$")
+
+
+def quote_kvp(string: str) -> str:
+    """For strings like ``name=value``, quote and escape the value if needed.
+
+    This is a compromise to respect quoting of key-value pairs on the CLI. The shell
+    strips quotes from quoted arguments, so we cannot know *exactly* how CLI arguments
+    were quoted. To compensate, we re-add quotes around anything staritng with ``name=``,
+    and we assume the rest of the argument is the value. This covers the
+    common cases of passing variant arguments with spaces in them, e.g.,
+    ``cflags="-O2 -g"`` on the command line.
+    """
+    match = SPLIT_KVP.match(string)
+    if not match:
+        return string
+
+    key, value = match.groups()
+    return f"{key}={quote_if_needed(value)}"
+
+
+def strip_quotes_and_unescape(string: str) -> str:
+    """Remove surrounding single or double quotes from string, if present."""
+    match = STRIP_QUOTES.match(string)
+    if not match:
+        return string
+
+    # replace any escaped quotes with bare quotes
+    quote, result = match.groups()
+    return result.replace(rf"\{quote}", quote)
+
+
+def quote_if_needed(value: str) -> str:
+    """Add quotes around the value if it requires quotes.
+
+    This will add quotes around the value unless it matches ``NO_QUOTES_NEEDED``.
+
+    This adds:
+    * single quotes by default
+    * double quotes around any value that contains single quotes
+
+    If double quotes are used, we json-escape the string. That is, we escape ``\\``,
+    ``"``, and control codes.
+
+    """
+    if NO_QUOTES_NEEDED.match(value):
+        return value
+
+    return json.dumps(value) if "'" in value else f"'{value}'"
+
+
 class Token:
     """Represents tokens; generated from input by lexer and fed to parse()."""
 
@@ -519,10 +602,13 @@ def expect(self, *kinds: TokenType):
 class SpecParser(object):
     __slots__ = "literal_str", "ctx", "type"
 
-    def __init__(self, type, literal_str: str):
-        self.literal_str = literal_str
+    def __init__(self, type, literal: Union[str, list]):
+        if isinstance(literal, list):
+            self.literal_str = " ".join([quote_kvp(arg) for arg in literal])
+        else:
+            self.literal_str = literal
         self.ctx = TokenContext(
-            filter(lambda x: x.kind != TokenType.WS, tokenize(literal_str))
+            filter(lambda x: x.kind != TokenType.WS, tokenize(self.literal_str))
         )
         self.type = type
 
@@ -549,9 +635,9 @@ def next_spec(self) -> Optional[Spec]:
 
         spec = self.type()
 
-        if self.ctx.accept(TokenType.UNQUALIFIED_PACKAGE_NAME):
+        if self.ctx.accept(TokenType.UNQUALIFIED_SPEC_NAME):
             spec.name = self.ctx.current_token.value
-        elif self.ctx.accept(TokenType.FULLY_QUALIFIED_PACKAGE_NAME):
+        elif self.ctx.accept(TokenType.FULLY_QUALIFIED_SPEC_NAME):
             parts = self.ctx.current_token.value.split(".")
             name = parts[-1]
             namespace = ".".join(parts[:-1])
@@ -570,7 +656,7 @@ def next_spec(self) -> Optional[Spec]:
                 ), f"SPLIT_KVP cannot split pair {self.ctx.current_token.value}"
 
                 name, value = match.groups()
-                spec.variants[name] = value.split(",")
+                spec.variants[name] = strip_quotes_and_unescape(value).split(",")
             else:
                 break
 
diff --git a/lib/benchpark/test/__init__.py b/lib/benchpark/test/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/lib/benchpark/test/conftest.py b/lib/benchpark/test/conftest.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/lib/benchpark/test/experiment.py b/lib/benchpark/test/experiment.py
new file mode 100644
index 000000000..325264cc0
--- /dev/null
+++ b/lib/benchpark/test/experiment.py
@@ -0,0 +1,81 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+import yaml
+
+import benchpark.experiment
+import benchpark.spec
+
+
+def test_write_yaml(monkeypatch, tmpdir):
+    spec = benchpark.spec.ExperimentSpec("saxpy").concretize()
+    experiment = spec.experiment
+
+    section_names = ["include", "config", "modifiers", "applications", "spack"]
+
+    for name in section_names:
+        monkeypatch.setattr(experiment, f"compute_{name}_section", lambda: True)
+
+    experiment_path = tmpdir.join("experiment_test")
+    experiment.write_ramble_dict(experiment_path)
+
+    with open(experiment_path, "r") as f:
+        output = yaml.safe_load(f)
+
+    assert output == {
+        "ramble": {
+            "software" if name == "spack" else name: True for name in section_names
+        }
+    }
+
+
+def test_compute_ramble_dict(monkeypatch):
+    spec = benchpark.spec.ExperimentSpec("saxpy").concretize()
+    experiment = spec.experiment
+
+    section_names = ["include", "config", "modifiers", "applications", "spack"]
+
+    for name in section_names:
+        monkeypatch.setattr(experiment, f"compute_{name}_section", lambda: True)
+
+    ramble_dict = experiment.compute_ramble_dict()
+
+    assert ramble_dict == {
+        "ramble": {
+            "software" if name == "spack" else name: True for name in section_names
+        }
+    }
+
+
+def test_default_include_section():
+    spec = benchpark.spec.ExperimentSpec("saxpy").concretize()
+    experiment = benchpark.experiment.Experiment(spec)
+
+    include_section = experiment.compute_include_section()
+
+    assert include_section == ["./configs"]
+
+
+def test_default_config_section():
+    spec = benchpark.spec.ExperimentSpec("saxpy").concretize()
+    experiment = benchpark.experiment.Experiment(spec)
+
+    config_section = experiment.compute_config_section()
+
+    assert config_section == {
+        "deprecated": True,
+        "spack_flags": {
+            "install": "--add --keep-stage",
+            "concretize": "-U -f",
+        },
+    }
+
+
+def test_default_modifiers_section():
+    spec = benchpark.spec.ExperimentSpec("saxpy").concretize()
+    experiment = benchpark.experiment.Experiment(spec)
+
+    modifiers_section = experiment.compute_modifiers_section()
+
+    assert modifiers_section == [{"name": "allocation"}]
diff --git a/lib/benchpark/test/paths.py b/lib/benchpark/test/paths.py
new file mode 100644
index 000000000..862124ca0
--- /dev/null
+++ b/lib/benchpark/test/paths.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import pathlib
+
+import benchpark.paths
+
+
+def test_benchpark_root(pytestconfig):
+    expected_path = pathlib.Path(pytestconfig.inipath).resolve().parent
+    assert benchpark.paths.benchpark_root == expected_path
diff --git a/lib/benchpark/test/spec_syntax.py b/lib/benchpark/test/spec_syntax.py
new file mode 100644
index 000000000..e078cb449
--- /dev/null
+++ b/lib/benchpark/test/spec_syntax.py
@@ -0,0 +1,248 @@
+# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import pytest
+
+from benchpark.spec import (
+    Spec,
+    SpecParser,
+    SpecTokenizationError,
+    Token,
+    TokenType,
+)
+
+
+def simple_spec_name(name):
+    """A simple spec name in canonical form"""
+    return name, [Token(TokenType.UNQUALIFIED_SPEC_NAME, value=name)], name
+
+
+@pytest.mark.parametrize(
+    "spec_str,tokens,expected_roundtrip",
+    [
+        # Names names
+        simple_spec_name("mvapich"),
+        simple_spec_name("mvapich_foo"),
+        simple_spec_name("_mvapich_foo"),
+        simple_spec_name("3dtk"),
+        simple_spec_name("ns-3-dev"),
+        # Single token anonymous specs
+        ("+foo", [Token(TokenType.BOOL_VARIANT, value="+foo")], "+foo"),
+        ("~foo", [Token(TokenType.BOOL_VARIANT, value="~foo")], "~foo"),
+        ("-foo", [Token(TokenType.BOOL_VARIANT, value="-foo")], "~foo"),
+        (
+            "platform=test",
+            [Token(TokenType.KEY_VALUE_PAIR, value="platform=test")],
+            "platform=test",
+        ),
+        # Multiple tokens specs
+        (
+            r"y~f+e~d+c~b+a",  # Variants are reordered
+            [
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="y"),
+                Token(TokenType.BOOL_VARIANT, value="~f"),
+                Token(TokenType.BOOL_VARIANT, value="+e"),
+                Token(TokenType.BOOL_VARIANT, value="~d"),
+                Token(TokenType.BOOL_VARIANT, value="+c"),
+                Token(TokenType.BOOL_VARIANT, value="~b"),
+                Token(TokenType.BOOL_VARIANT, value="+a"),
+            ],
+            "y+a~b+c~d+e~f",
+        ),
+        (
+            r"os=fe",
+            [Token(TokenType.KEY_VALUE_PAIR, value="os=fe")],
+            "os=fe",
+        ),
+        # Ambiguous variant specification
+        (
+            r"_openmpi +debug-qt_4",  # Parse as a single bool variant
+            [
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="_openmpi"),
+                Token(TokenType.BOOL_VARIANT, value="+debug-qt_4"),
+            ],
+            r"_openmpi+debug-qt_4",
+        ),
+        (
+            r"_openmpi +debug -qt_4",  # Parse as two variants
+            [
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="_openmpi"),
+                Token(TokenType.BOOL_VARIANT, value="+debug"),
+                Token(TokenType.BOOL_VARIANT, value="-qt_4"),
+            ],
+            r"_openmpi+debug~qt_4",
+        ),
+        (
+            r"_openmpi +debug~qt_4",  # Parse as two variants
+            [
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="_openmpi"),
+                Token(TokenType.BOOL_VARIANT, value="+debug"),
+                Token(TokenType.BOOL_VARIANT, value="~qt_4"),
+            ],
+            r"_openmpi+debug~qt_4",
+        ),
+        # One liner for values like 'a=b=c' that are injected
+        (
+            "cflags=a=b=c",
+            [Token(TokenType.KEY_VALUE_PAIR, value="cflags=a=b=c")],
+            "cflags='a=b=c'",
+        ),
+        (
+            "cflags=a=b=c+~",
+            [Token(TokenType.KEY_VALUE_PAIR, value="cflags=a=b=c+~")],
+            "cflags='a=b=c+~'",
+        ),
+        (
+            "cflags=-Wl,a,b,c",
+            [Token(TokenType.KEY_VALUE_PAIR, value="cflags=-Wl,a,b,c")],
+            "cflags=-Wl,a,b,c",
+        ),
+        # Multi quoted
+        (
+            'cflags="-O3 -g"',
+            [Token(TokenType.KEY_VALUE_PAIR, value='cflags="-O3 -g"')],
+            "cflags='-O3 -g'",
+        ),
+    ],
+)
+def test_parse_single_spec(spec_str, tokens, expected_roundtrip):
+    parser = SpecParser(Spec, spec_str)
+    assert tokens == parser.tokens()
+    assert expected_roundtrip == str(parser.next_spec())
+
+
+@pytest.mark.parametrize(
+    "text,tokens,expected_specs",
+    [
+        (
+            "mvapich emacs",
+            [
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="mvapich"),
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="emacs"),
+            ],
+            ["mvapich", "emacs"],
+        ),
+        (
+            "mvapich cppflags='-O3 -fPIC' emacs",
+            [
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="mvapich"),
+                Token(TokenType.KEY_VALUE_PAIR, value="cppflags='-O3 -fPIC'"),
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="emacs"),
+            ],
+            ["mvapich cppflags='-O3 -fPIC'", "emacs"],
+        ),
+        (
+            "mvapich cppflags=-O3 emacs",
+            [
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="mvapich"),
+                Token(TokenType.KEY_VALUE_PAIR, value="cppflags=-O3"),
+                Token(TokenType.UNQUALIFIED_SPEC_NAME, value="emacs"),
+            ],
+            ["mvapich cppflags=-O3", "emacs"],
+        ),
+    ],
+)
+def test_parse_multiple_specs(text, tokens, expected_specs):
+    total_parser = SpecParser(Spec, text)
+    assert total_parser.tokens() == tokens
+
+    for single_spec_text in expected_specs:
+        single_spec_parser = SpecParser(Spec, single_spec_text)
+        assert str(total_parser.next_spec()) == str(single_spec_parser.next_spec())
+
+
+@pytest.mark.parametrize(
+    "args,expected",
+    [
+        # Test that CLI-quoted flags/variant values are preserved
+        (["zlib", "cflags=-O3 -g", "+bar", "baz"], "zlib+bar cflags='-O3 -g' baz"),
+        # Test that CLI-quoted propagated flags/variant values are preserved
+        (["zlib", "cflags==-O3 -g", "+bar", "baz"], "zlib+bar cflags='=-O3 -g' baz"),
+        # An entire string passed on the CLI with embedded quotes also works
+        (["zlib cflags='-O3 -g' +bar baz"], "zlib+bar cflags='-O3 -g' baz"),
+        # Entire string *without* quoted flags splits -O3/-g (-g interpreted as a variant)
+        (["zlib cflags=-O3 -g +bar baz"], "zlib+bar~g cflags=-O3 baz"),
+        # If the entirety of "-O3 -g +bar baz" is quoted on the CLI, it's all taken as flags
+        (["zlib", "cflags=-O3 -g +bar baz"], "zlib cflags='-O3 -g +bar baz'"),
+        # If the string doesn't start with key=, it needs internal quotes for flags
+        (["zlib", " cflags=-O3 -g +bar baz"], "zlib+bar~g cflags=-O3 baz"),
+        # Internal quotes for quoted CLI args are considered part of *one* arg
+        (["zlib", 'cflags="-O3 -g" +bar baz'], """zlib cflags='"-O3 -g" +bar baz'"""),
+        # Use double quotes if internal single quotes are present
+        (["zlib", "cflags='-O3 -g' +bar baz"], '''zlib cflags="'-O3 -g' +bar baz"'''),
+        # Use single quotes and escape single quotes with internal single and double quotes
+        (
+            ["zlib", "cflags='-O3 -g' \"+bar baz\""],
+            'zlib cflags="\'-O3 -g\' \\"+bar baz\\""',
+        ),
+        # Ensure that empty strings are handled correctly on CLI
+        (["zlib", "ldflags=", "+pic"], "zlib+pic ldflags=''"),
+        # These flags are assumed to be quoted by the shell, so the space
+        # becomes part of the value
+        (["zlib", "ldflags= +pic"], "zlib ldflags=' +pic'"),
+        (["ldflags= +pic"], "ldflags=' +pic'"),
+        # If the name is not a flag name, the space is preserved verbatim, because variant values
+        # are comma-separated.
+        (["zlib", "foo= +pic"], "zlib foo=' +pic'"),
+        (["foo= +pic"], "foo=' +pic'"),
+        # You can ensure no quotes are added parse_specs() by starting your string with space,
+        # but you still need to quote empty strings properly.
+        ([" ldflags= +pic"], SpecTokenizationError),
+        ([" ldflags=", "+pic"], SpecTokenizationError),
+        ([" ldflags='' +pic"], "+pic ldflags=''"),
+        ([" ldflags=''", "+pic"], "+pic ldflags=''"),
+        # Ensure that empty strings are handled properly in quoted strings
+        (["zlib ldflags='' +pic"], "zlib+pic ldflags=''"),
+        # Ensure that $ORIGIN is handled correctly
+        (
+            ["zlib", "ldflags=-Wl,-rpath=$ORIGIN/_libs"],
+            "zlib ldflags='-Wl,-rpath=$ORIGIN/_libs'",
+        ),
+        # Ensure that passing escaped quotes on the CLI raises a tokenization error
+        (["zlib", '"-g', '-O2"'], SpecTokenizationError),
+    ],
+)
+def test_cli_spec_roundtrip(args, expected):
+    if isinstance(expected, type) and issubclass(expected, BaseException):
+        with pytest.raises(expected):
+            _ = SpecParser(Spec, args).all_specs()
+        return
+
+    specs = SpecParser(Spec, args).all_specs()
+    output_string = " ".join(str(spec) for spec in specs)
+    for spec in specs:
+        print("FINAL", spec)
+    print(output_string)
+    print(expected)
+    assert output_string == expected
+
+
+@pytest.mark.parametrize(
+    "text,expected_in_error",
+    [
+        ("cflags=''-Wl,a,b,c''", r"cflags=''-Wl,a,b,c''\n            ^ ^ ^ ^^"),
+    ],
+)
+def test_error_reporting(text, expected_in_error):
+    parser = SpecParser(Spec, text)
+    with pytest.raises(SpecTokenizationError) as exc:
+        parser.tokens()
+
+    assert expected_in_error in str(exc), parser.tokens()
+
+
+@pytest.mark.parametrize(
+    "text,match_string",
+    [
+        # Duplicate variants
+        ("x+debug+debug", "variant"),
+        ("x+debug debug=true", "variant"),
+        ("x debug=false debug=true", "variant"),
+        ("x debug=false ~debug", "variant"),
+    ],
+)
+def test_error_conditions(text, match_string):
+    with pytest.raises(Exception, match=match_string):
+        SpecParser(Spec, text).next_spec()
diff --git a/lib/main.py b/lib/main.py
index 32a5c98db..f043e8eb2 100755
--- a/lib/main.py
+++ b/lib/main.py
@@ -6,6 +6,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import argparse
+import inspect
 import os
 import pathlib
 import shlex
@@ -16,12 +17,14 @@
 import benchpark.cmd.system
 import benchpark.cmd.experiment
 import benchpark.cmd.setup
+import benchpark.cmd.unit_test
+import benchpark.paths
 from benchpark.accounting import (
     benchpark_experiments,
     benchpark_modifiers,
     benchpark_systems,
 )
-from benchpark.paths import source_location
+
 
 __version__ = "0.1.0"
 
@@ -42,7 +45,7 @@ def main():
     benchpark_tags(subparsers, actions)
     init_commands(subparsers, actions)
 
-    args = parser.parse_args()
+    args, unknown_args = parser.parse_known_args()
     no_args = True if len(sys.argv) == 1 else False
 
     if no_args:
@@ -54,7 +57,15 @@ def main():
         return 0
 
     if args.subcommand in actions:
-        actions[args.subcommand](args)
+        action = actions[args.subcommand]
+        if supports_unknown_args(action):
+            action(args, unknown_args)
+        elif unknown_args:
+            raise argparse.ArgumentTypeError(
+                f"benchpark {args.subcommand} has no option(s) {unknown_args}"
+            )
+        else:
+            action(args)
     else:
         print(
             "Invalid subcommand ({args.subcommand}) - must choose one of: "
@@ -62,6 +73,19 @@ def main():
         )
 
 
+def supports_unknown_args(command):
+    """Implements really simple argument injection for unknown arguments.
+
+    Commands may add an optional argument called "unknown args" to
+    indicate they can handle unknown args, and we'll pass the unknown
+    args in.
+    """
+    info = dict(inspect.getmembers(command))
+    varnames = info["__code__"].co_varnames
+    argcount = info["__code__"].co_argcount
+    return argcount == 2 and varnames[1] == "unknown_args"
+
+
 def get_version():
     benchpark_version = __version__
     return benchpark_version
@@ -76,7 +100,7 @@ def benchpark_list(subparsers, actions_dict):
 
 
 def benchpark_benchmarks():
-    source_dir = source_location()
+    source_dir = benchpark.paths.benchpark_root
     benchmarks = []
     experiments_dir = source_dir / "experiments"
     for x in os.listdir(experiments_dir):
@@ -85,7 +109,7 @@ def benchpark_benchmarks():
 
 
 def benchpark_get_tags():
-    f = source_location() / "tags.yaml"
+    f = benchpark.paths.benchpark_root / "tags.yaml"
     tags = []
 
     with open(f, "r") as stream:
@@ -183,9 +207,15 @@ def init_commands(subparsers, actions_dict):
     )
     benchpark.cmd.setup.setup_parser(setup_parser)
 
+    unit_test_parser = subparsers.add_parser(
+        "unit-test", help="Run benchpark unit tests"
+    )
+    benchpark.cmd.unit_test.setup_parser(unit_test_parser)
+
     actions_dict["system"] = benchpark.cmd.system.command
     actions_dict["experiment"] = benchpark.cmd.experiment.command
     actions_dict["setup"] = benchpark.cmd.setup.command
+    actions_dict["unit-test"] = benchpark.cmd.unit_test.command
 
 
 def run_command(command_str, env=None):
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..a6f9e83c5
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+# content of pytest.ini
+[pytest]
+testpaths = lib/benchpark/test
+python_files = *.py
diff --git a/spack.yaml b/spack.yaml
new file mode 100644
index 000000000..2f70ec71f
--- /dev/null
+++ b/spack.yaml
@@ -0,0 +1,13 @@
+# This is a Spack Environment file.
+#
+# It describes a set of packages to be installed, along with
+# configuration settings.
+spack:
+  # add package specs to the `specs` list
+  specs:
+  - py-pyyaml
+  - py-pytest
+  - py-codespell
+  view: true
+  concretizer:
+    unify: true