Feature/experiment scaling (#375)

* Add initial framework for experiment init * saxpy: fix mismatch between variable name and usage * cmd/experiment: fix interface to Experiment class * cmd/experiment: implement benchpark experiment list * cmd/experiment: remove vestigial references to 'system' * spec: fix bug with ConcreteSpec.satisfies and ConcreteSpec.intersects * spec: remove vestigial 'autospec' decorator * Implementation of strong scaling * Weak scaling implementation * Scaling implementation * kripke scaling experiment * Fix lint formatting * Fix expr name * Fix lint formatting * Fix variable name * amg2023 scaling implementation * Fix lint formatiing * Move scaling to lib/benchpark/experiment.py * Update method documentation * Fix lint formatting * Fix lint formatting * Separating scaling and programming models in AMG experiment * formatting * Simplifying strings * Simplifying strings * lint * lint * lint * Adding single node experiment * Update experiment.py * Fix variable name --------- Co-authored-by: Alec Scott <[email protected]> Co-authored-by: Gregory Becker <[email protected]> Co-authored-by: Riyaz Haque <[email protected]> Co-authored-by: pearce8 <[email protected]>
LLNL · Oct 25, 2024 · 9a35443 · 9a35443
1 parent 7017d60
commit 9a35443
Show file tree

Hide file tree

Showing 3 changed files with 342 additions and 109 deletions.
diff --git a/lib/benchpark/experiment.py b/lib/benchpark/experiment.py
@@ -7,6 +7,7 @@
 import yaml  # TODO: some way to ensure yaml available
 
 from benchpark.directives import ExperimentSystemBase
+from benchpark.directives import variant
 import benchpark.spec
 import benchpark.paths
 import benchpark.repo
@@ -48,10 +49,153 @@ class Experiment(ExperimentSystemBase):
         Dict[str, benchpark.variant.Variant],
     ]
 
+    variant(
+        "scaling-factor",
+        default="2",
+        values=int,
+        description="Factor by which to scale values of problem variables",
+    )
+
+    variant(
+        "scaling-iterations",
+        default="4",
+        values=int,
+        description="Number of experiments to be generated",
+    )
+
     def __init__(self, spec):
         self.spec: "benchpark.spec.ConcreteExperimentSpec" = spec
         super().__init__()
 
+    # input parameters:
+    # 1. input_variables: dictionary with key value pairs of type str: int or tuple(str): list(int)
+    # For the value in input_variables corresponding to scaling_variable,
+    # if the value is a list, select the index of its smallest element, 0 otherwise
+    # Beginning with this index, generate a list of indexes of length equal to
+    # the number of dimensions in an (ascending) round-robin order
+    # 2. scaling_variable: variable of type str or tuple(str). The scaling order is determined by
+    # the value in input_variables corresponding to scaling_variable.
+    #
+    # output:
+    # scaling_order: list[int]. list of indices, with one value for each dimension,
+    # starting with the minimum value of the first element in input_variables arranged
+    # in an ascending round-robin order
+    def configure_scaling_policy(self, input_variables, scaling_variable):
+        # compute the number of dimensions
+        n_dims = 1
+        for param in input_variables.values():
+            if isinstance(param, list):
+                n_dims = len(param)
+                break
+
+        # starting with the minimum value dim of the scaling_variable
+        # compute the remaining n_dims-1 values in a round-robin manner
+        val = input_variables[scaling_variable]
+        min_dim = val.index(min(val)) if isinstance(val, list) else 0
+
+        return [(min_dim + i) % n_dims for i in range(n_dims)]
+
+    # input parameters:
+    # 1. input_variables: dict[str, int | tuple(str), list[int]]. Dictionary of all variables
+    # that need to be scaled. All variables are ordered as per the ordering policy of
+    # the first element in input_variables. By default, this policy is to scale the
+    # values beginning with the smallest dimension and proceeding in a RR manner through
+    # the other dimensions
+    #
+    # 2. scaling_factor: int. Factor by which to scale the variables. All entries in
+    # input_variables are scaled by the same factor
+    #
+    # 3. num_exprs: int. Number of experiments to be generated
+    #
+    # 4. scaling_variable: variable of type str or tuple(str). The scaling order is determined by
+    # the value in input_variables corresponding to scaling_variable. If no scaling_variable is
+    # specified, the scaling order is defined using the first element in input_variables
+    #
+    # output:
+    # scaling_order: list[int]. list of indices, with one value for each dimension,
+    # output:
+    # output_variables: dict[str, int | list[int]]. num_exprs values for each
+    # dimension of the input variable scaled by the scaling_factor according to the
+    # scaling policy
+    def scale_experiment_variables(
+        self, input_variables, scaling_factor, num_exprs, scaling_variable=None
+    ):
+        # check if variable list is not empty
+        if not input_variables:
+            return {}
+
+        # if undefined, set scaling_variable to the first param in the input_params dict
+        if not scaling_variable:
+            scaling_variable = next(iter(input_variables))
+
+        # check if scaling_variable is a valid key into the input_variables dictionary
+        if scaling_variable not in input_variables:
+            raise RuntimeError("Invalid ordering variable")
+
+        # check if:
+        # 1. input_variables key value pairs are either of type str: int or tuple(str): list(int)
+        # 2. the length of key: tuple(str) is equal to length of value: list(int)
+        # 3. all values of type list(int) have the same length i.e. the same number of dimensions
+        n_dims = None
+        for k, v in input_variables.items():
+            if isinstance(k, str):
+                if not isinstance(v, int):
+                    raise RuntimeError("Invalid key-value pair. Expected type str->int")
+            elif isinstance(k, tuple) and all(isinstance(s, str) for s in k):
+                if isinstance(v, list) and all(isinstance(i, int) for i in v):
+                    if len(k) != len(v):
+                        raise RuntimeError(
+                            "Invalid value. Length of key {k} does not match the length of value {v}"
+                        )
+                    else:
+                        if not n_dims:
+                            n_dims = len(v)
+                        if len(v) != n_dims:
+                            raise RuntimeError(
+                                "Variables to be scaled have different dimensions"
+                            )
+                else:
+                    raise RuntimeError(
+                        "Invalid key-value pair. Expected type tuple(str)->list[int]"
+                    )
+            else:
+                raise RuntimeError("Invalid key. Expected type str or tuple(str)")
+
+        # compute the scaling order based on the scaling_variable
+        scaling_order_index = self.configure_scaling_policy(
+            input_variables, scaling_variable
+        )
+
+        scaled_variables = {}
+        for key, val in input_variables.items():
+            scaled_variables[key] = (
+                [[v] for v in val] if isinstance(val, list) else [[val]]
+            )
+
+        # Take initial parameterized vector for experiment, for each experiment after the first, scale one
+        # dimension of that vector by the scaling factor; cycle through the dimensions in round-robin fashion.
+        for exp_num in range(num_exprs - 1):
+            for param in scaled_variables.values():
+                if len(param) == 1:
+                    param[0].append(param[0][-1] * scaling_factor)
+                else:
+                    for p_idx, p_val in enumerate(param):
+                        p_val.append(
+                            p_val[-1] * scaling_factor
+                            if p_idx
+                            == scaling_order_index[exp_num % len(scaling_order_index)]
+                            else p_val[-1]
+                        )
+
+        output_variables = {}
+        for k, v in scaled_variables.items():
+            if isinstance(k, tuple):
+                for i in range(len(k)):
+                    output_variables[k[i]] = v[i] if len(v[i]) > 1 else v[i][0]
+            else:
+                output_variables[k] = v[0] if len(v[0]) > 1 else v[0][0]
+        return output_variables
+
     def compute_include_section(self):
         # include the config directory
         # TODO: does this need to change to interop with System class

diff --git a/var/exp_repo/experiments/amg2023/experiment.py b/var/exp_repo/experiments/amg2023/experiment.py
@@ -14,14 +14,15 @@ class Amg2023(Caliper, Experiment):
     variant(
         "workload",
         default="problem1",
+        values=("problem1", "problem2"),
         description="problem1 or problem2",
     )
 
     variant(
         "experiment",
-        default="example",
-        values=("strong", "weak", "example"),
-        description="type of experiment",
+        default="single-node",
+        values=("strong", "weak", "example", "single-node", "throughput"),
+        description="strong scaling, weak scaling, single-node, throughput study or an example",
     )
 
     def make_experiment_example(self):
@@ -64,13 +65,8 @@ def make_experiment_example(self):
         variables["nz"] = n
         zips["size"] = ["nx", "ny", "nz"]
 
-        m_tag = (
-            "matrices" if self.spec.satisfies("programming_model=openmp") else "matrix"
-        )
         if self.spec.satisfies("programming_model=openmp"):
-            matrices.append(
-                {"size_nodes_threads": ["size", "n_nodes", "n_threads_per_proc"]}
-            )
+            matrices.extend(["size", "n_nodes", "n_threads_per_proc"])
         elif self.spec.satisfies("programming_model=cuda") or self.spec.satisfies(
             "programming_model=rocm"
         ):
@@ -94,7 +90,7 @@ def make_experiment_example(self):
                                 "variables": variables,
                                 "zips": zips,
                                 "exclude": excludes,
-                                m_tag: matrices,
+                                "matrix": matrices,
                             }
                         }
                     }
@@ -115,15 +111,87 @@ def compute_applications_section(self):
 
         if self.spec.satisfies("experiment=example"):
             return self.make_experiment_example()
-        elif self.spec.satisfies("experiment=strong"):
-            return self.make_experiment_strong()
-        elif self.spec.satisfies("experiment=weak"):
-            return self.make_experiment_weak()
-        else:
-            raise NotImplementedError(
-                "Unsupported experiment. Only strong, weak and example experiments are supported"
+
+        px = "px"
+        py = "py"
+        pz = "pz"
+        nx = "nx"
+        ny = "ny"
+        nz = "nz"
+        num_procs = "{px} * {py} * {pz}"
+
+        variables = {}
+        variables["n_ranks"] = num_procs
+
+        if self.spec.satisfies("programming_model=openmp"):
+            variables["n_ranks"] = num_procs
+            variables["n_threads_per_proc"] = 1
+            n_resources = "{n_ranks}_{n_threads_per_proc}"
+        elif self.spec.satisfies("programming_model=cuda"):
+            variables["n_gpus"] = num_procs
+            n_resources = "{n_gpus}"
+        elif self.spec.satisfies("programming_model=rocm"):
+            variables["n_gpus"] = num_procs
+            n_resources = "{n_gpus}"
+
+        experiment_name = f"amg2023_{self.spec.variants['programming_model'][0]}_{self.spec.variants['experiment'][0]}_{self.workload}_{{n_nodes}}_{n_resources}_{{{px}}}_{{{py}}}_{{{pz}}}_{{{nx}}}_{{{ny}}}_{{{nz}}}"
+
+        experiment_setup = {}
+        experiment_setup["variants"] = {"package_manager": "spack"}
+
+        # Number of processes in each dimension
+        initial_p = [2, 2, 2]
+
+        # Per-process size (in zones) in each dimension
+        initial_n = [80, 80, 80]
+
+        if self.spec.satisfies("experiment=single-node"):
+            variables[px] = initial_p[0]
+            variables[py] = initial_p[1]
+            variables[pz] = initial_p[2]
+            variables[nx] = initial_n[0]
+            variables[ny] = initial_n[1]
+            variables[nz] = initial_n[2]
+        else:  # A scaling study
+            input_params = {}
+            if self.spec.satisfies("experiment=throughput"):
+                variables[px] = initial_p[0]
+                variables[py] = initial_p[1]
+                variables[pz] = initial_p[2]
+                scaling_variable = (nx, ny, nz)
+                input_params[scaling_variable] = initial_n
+            elif self.spec.satisfies("experiment=strong"):
+                scaling_variable = (px, py, pz)
+                input_params[scaling_variable] = initial_p
+                variables[nx] = initial_n[0]
+                variables[ny] = initial_n[1]
+                variables[nz] = initial_n[2]
+            elif self.spec.satisfies("experiment=weak"):
+                scaling_variable = (px, py, pz)
+                input_params[scaling_variable] = initial_p
+                input_params[(nx, ny, nz)] = initial_n
+            variables |= self.scale_experiment_variables(
+                input_params,
+                int(self.spec.variants["scaling-factor"][0]),
+                int(self.spec.variants["scaling-iterations"][0]),
+                scaling_variable,
             )
 
+        # TODO: Add explanation
+        experiment_setup["variables"] = variables
+
+        return {
+            self.spec.name: {
+                "workloads": {
+                    self.workload: {
+                        "experiments": {
+                            experiment_name: experiment_setup,
+                        }
+                    }
+                }
+            }
+        }
+
     def compute_spack_section(self):
         app_name = self.spec.name