LLNL · pearce8 · Jun 8, 2024 · Mar 1, 2024 · Mar 1, 2024 · Mar 1, 2024
diff --git a/configs/LLNL-Pascal-Penguin-broadwell-P100-OmniPath/spack.yaml b/configs/LLNL-Pascal-Penguin-broadwell-P100-OmniPath/spack.yaml
@@ -15,6 +15,8 @@ spack:
       spack_spec: clang@=14.0.6
     mpi-clang:
       spack_spec: [email protected]
+    mpi-gcc:
+      spack_spec: [email protected]
     blas:
       spack_spec: [email protected]
     cublas-cuda:

diff --git a/configs/LLNL-Sierra-IBM-power9-V100-Infiniband/auxiliary_software_files/packages.yaml b/configs/LLNL-Sierra-IBM-power9-V100-Infiniband/auxiliary_software_files/packages.yaml
@@ -97,4 +97,6 @@ packages:
       prefix: /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-clang-16.0.6-cuda-11.8.0-gcc-11.2.1
       extra_attributes:
         ldflags: "-lmpiprofilesupport -lmpi_ibm_usempi -lmpi_ibm_mpifh -lmpi_ibm"
+    - spec: [email protected]
+      prefix: /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-8.3.1
     buildable: false
diff --git a/configs/LLNL-Sierra-IBM-power9-V100-Infiniband/spack.yaml b/configs/LLNL-Sierra-IBM-power9-V100-Infiniband/spack.yaml
@@ -19,6 +19,8 @@ spack:
       spack_spec: [email protected]{default_cuda_version}
     mpi-clang:
       spack_spec: [email protected]{default_cuda_version}
+    mpi-gcc:
+      spack_spec: [email protected]
     compiler-clang-ibm:
       spack_spec: [email protected]{default_cuda_version}-gcc-11.2.1
     mpi-clang-ibm:

diff --git a/configs/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/spack.yaml b/configs/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/spack.yaml
@@ -8,11 +8,13 @@ spack:
     default-compiler:
       spack_spec: cce@16
     default-mpi:
-      spack_spec: [email protected]%cce ~gtl
+      spack_spec: [email protected].26%cce@16.0.0 ~gtl
     compiler-rocm:
       spack_spec: cce@16
     compiler-amdclang:
       spack_spec: [email protected]
+    compiler-gcc:
+      spack_spec: [email protected]
     blas-rocm:
       spack_spec: [email protected]
     blas:

diff --git a/experiments/streamc/openmp/ramble.yaml → experiments/stream/openmp/ramble.yaml b/experiments/streamc/openmp/ramble.yaml → experiments/stream/openmp/ramble.yaml
@@ -7,6 +7,7 @@ ramble:
   include:
   - ./configs/spack.yaml
   - ./configs/variables.yaml
+  - ./configs/modifier.yaml
 
   config:
     deprecated: true
@@ -15,34 +16,40 @@ ramble:
       concretize: '-U -f'
 
   variables:
-    n_times: ['20', '35']
-    array_size: ['80000000', '1280000000']
+    n: ['35', '35', '35', '35']
+    s: ['32000000', '64000000', '128000000', '256000000']
+    o: ['0', '0', '0', '0']
 
   modifiers:
   - name: allocation
 
   applications:
-    streamc:
+    stream:
       workloads:
-        streamc:
+        stream:
+          env_vars:
+            set:
+              OMP_NUM_THREADS: '{n_threads_per_proc}'
           variables:
             n_ranks: '1'
           experiments:
-            stream_{array_size}_{n_times}_{n_threads_per_proc}:
+            stream_{s}_{o}_{n}_{n_threads_per_proc}:
               variables:
-                env_name: 'stream_{array_size}_{n_times}'
-                n_ranks_per_node: '1'
+                env_name: 'stream'
+                processes_per_node: '1'
                 n_nodes: '1'
-                n_threads_per_proc: ['8', '16', '32']
+                n_threads_per_proc: ['16', '32']
               matrix:
                 - n_threads_per_proc
 
   spack:
     packages:
-      stream_{array_size}_{n_times}:
-        spack_spec: '[email protected] +openmp stream_array_size={array_size} ntimes={n_times} cflags="-mcmodel=medium -Ofast -flto"'
+      stream:
+        spack_spec: [email protected]-caliper{modifier_spack_variant}
         compiler: default-compiler
     environments:
-      stream_{array_size}_{n_times}:
+      stream:
         packages:
-        - stream_{array_size}_{n_times}
+        - default-mpi
+        - stream
+        - '{modifier_package_name}'
diff --git a/modifiers/allocation/modifier.py b/modifiers/allocation/modifier.py
@@ -344,7 +344,7 @@ def flux_instructions(self, v):
             cmd_opts.append(f"--gpus-per-task={gpus_per_rank}")
 
         if v.timeout:
-            batch_opts.append("-t {v.timeout}m")
+            batch_opts.append(f"-t {v.timeout}m")
 
         batch_directives = list(f"# flux: {x}" for x in (cmd_opts + batch_opts))
 

diff --git a/repo/stream/application.py b/repo/stream/application.py
@@ -0,0 +1,102 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from ramble.appkit import *
+from ramble.expander import Expander
+
+
+class Stream(SpackApplication):
+    '''Define STREAM application'''
+    name = 'stream'
+
+    maintainers('dodecatheon')
+
+    tags('memorybenchmark', 'microbenchmark', 'memory-benchmark', 'micro-benchmark')
+
+    software_spec('stream',
+                  spack_spec='[email protected] +openmp cflags="-O3 -DSTREAM_ARRAY_SIZE=80000000 -DNTIMES=20"',
+                  compiler='gcc12')
+
+    required_package('stream')
+
+    executable('execute', 'stream -n {n} -s {s} -o {o}', use_mpi=True)
+
+    workload('stream', executable='execute')
+
+    workload_variable('n', default='10', description='NTIMES', workloads=['stream'])
+    workload_variable('s', default='10000000', description='STREAM_ARRAY_SIZE', workloads=['stream'])
+    workload_variable('o', default='0', description='OFFSET', workloads=['stream'])
+
+    log_file = os.path.join(Expander.expansion_str('experiment_run_dir'),
+                            Expander.expansion_str('experiment_name') + '.out')
+
+    success_criteria('valid', mode='string',
+                     match=r'Solution Validates: avg error less than 1.000000e-13 on all three arrays',
+                     file=log_file)
+
+    figure_of_merit("Array size",
+                    log_file=log_file,
+                    fom_regex=r'Array size\s+\=\s+(?P<array_size>[0-9]+)',
+                    group_name='array_size',
+                    units='elements')
+
+    figure_of_merit("Array memory",
+                    log_file=log_file,
+                    fom_regex=r'Memory per array\s+\=\s+(?P<array_mem>[0-9]+)\.*[0-9]*',
+                    group_name='array_mem',
+                    units='MiB')
+
+    figure_of_merit("Total memory",
+                    log_file=log_file,
+                    fom_regex=r'Total memory required\s+\=\s+(?P<total_mem>[0-9]+\.*[0-9]*)',
+                    group_name='total_mem',
+                    units='MiB')
+
+    figure_of_merit("Number of iterations per thread",
+                    log_file=log_file,
+                    fom_regex=r'Each kernel will be executed\s+(?P<n_times>[0-9]+)',
+                    group_name='n_times',
+                    units='')
+
+    figure_of_merit("Number of threads",
+                    log_file=log_file,
+                    fom_regex=r'Number of Threads counted\s+\=\s+(?P<n_threads>[0-9]+\.*[0-9]*)',
+                    group_name='n_threads',
+                    units='')
+
+    for opName in ['Copy', 'Scale', 'Add', 'Triad']:
+
+        opname = opName.lower()
+
+        opregex = (r'^' + opName + r':' +
+                   r'\s+(?P<' + opname + r'_top_rate>[0-9]+\.[0-9]*)' +
+                   r'\s+(?P<' + opname + r'_avg_time>[0-9]+\.[0-9]*)' +
+                   r'\s+(?P<' + opname + r'_min_time>[0-9]+\.[0-9]*)' +
+                   r'\s+(?P<' + opname + r'_max_time>[0-9]+\.[0-9]*)')
+
+        figure_of_merit(opName + ' top rate',
+                        log_file=log_file,
+                        fom_regex=opregex,
+                        group_name=(opname + '_top_rate'),
+                        units='MB/s')
+
+        figure_of_merit(opName + ' average time',
+                        log_file=log_file,
+                        fom_regex=opregex,
+                        group_name=(opname + '_avg_time'),
+                        units='s')
+
+        figure_of_merit(opName + ' min time',
+                        log_file=log_file,
+                        fom_regex=opregex,
+                        group_name=(opname + '_min_time'),
+                        units='s')
+
+        figure_of_merit(opName + ' max time',
+                        log_file=log_file,
+                        fom_regex=opregex,
+                        group_name=(opname + '_max_time'),
+                        units='s')
diff --git a/repo/stream/package.py b/repo/stream/package.py
@@ -0,0 +1,35 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from spack.package import *
+
+class Stream(CMakePackage):
+    """The STREAM benchmark is a simple synthetic benchmark program that
+    measures sustainable memory bandwidth (in MB/s) and the corresponding
+    computation rate for simple vector kernels.
+
+    This package builds a fork of the official code with Caliper support, 
+    a CMake build system, and the ability to configure settings 
+    (array size, iterations, offset) at runtime via the command line."""
+
+    homepage = "https://www.cs.virginia.edu/stream/ref.html"
+    git = "https://github.com/daboehme/STREAM.git"
+
+    version("5.10-caliper", git="https://github.com/daboehme/STREAM.git",
+            branch="caliper-benchpark")
+
+    variant("caliper", default=False, description="Enable Caliper/Adiak support")
+
+    requires("@5.10-caliper", when="+caliper")
+
+    depends_on("caliper", when="+caliper")
+    depends_on("[email protected]:", when="+caliper")
+
+    def cmake_args(self):
+        args = [ 
+            self.define_from_variant("STREAM_ENABLE_CALIPER", "caliper") 
+        ]
+
+        return args