Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Caliperizing stream #155

Merged
merged 14 commits into from
Jun 8, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ spack:
spack_spec: clang@=14.0.6
mpi-clang:
spack_spec: [email protected]
mpi-gcc:
spack_spec: [email protected]
blas:
spack_spec: [email protected]
cublas-cuda:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,6 @@ packages:
prefix: /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-clang-16.0.6-cuda-11.8.0-gcc-11.2.1
extra_attributes:
ldflags: "-lmpiprofilesupport -lmpi_ibm_usempi -lmpi_ibm_mpifh -lmpi_ibm"
- spec: [email protected]
prefix: /usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-8.3.1
buildable: false
2 changes: 2 additions & 0 deletions configs/LLNL-Sierra-IBM-power9-V100-Infiniband/spack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ spack:
spack_spec: [email protected]{default_cuda_version}
mpi-clang:
spack_spec: [email protected]{default_cuda_version}
mpi-gcc:
spack_spec: [email protected]
compiler-clang-ibm:
spack_spec: [email protected]{default_cuda_version}-gcc-11.2.1
mpi-clang-ibm:
Expand Down
4 changes: 3 additions & 1 deletion configs/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/spack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ spack:
default-compiler:
spack_spec: cce@16
default-mpi:
spack_spec: [email protected]%cce ~gtl
spack_spec: [email protected].26%cce@16.0.0 ~gtl
compiler-rocm:
spack_spec: cce@16
compiler-amdclang:
spack_spec: [email protected]
compiler-gcc:
spack_spec: [email protected]
blas-rocm:
spack_spec: [email protected]
blas:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ramble:
include:
- ./configs/spack.yaml
- ./configs/variables.yaml
- ./configs/modifier.yaml

config:
deprecated: true
Expand All @@ -15,34 +16,40 @@ ramble:
concretize: '-U -f'

variables:
n_times: ['20', '35']
array_size: ['80000000', '1280000000']
n: ['35', '35', '35', '35']
s: ['32000000', '64000000', '128000000', '256000000']
o: ['0', '0', '0', '0']

modifiers:
- name: allocation

applications:
streamc:
stream:
workloads:
streamc:
stream:
env_vars:
set:
OMP_NUM_THREADS: '{n_threads_per_proc}'
variables:
n_ranks: '1'
experiments:
stream_{array_size}_{n_times}_{n_threads_per_proc}:
stream_{s}_{o}_{n}_{n_threads_per_proc}:
variables:
env_name: 'stream_{array_size}_{n_times}'
n_ranks_per_node: '1'
env_name: 'stream'
processes_per_node: '1'
n_nodes: '1'
n_threads_per_proc: ['8', '16', '32']
n_threads_per_proc: ['16', '32']
matrix:
- n_threads_per_proc

spack:
packages:
stream_{array_size}_{n_times}:
spack_spec: '[email protected] +openmp stream_array_size={array_size} ntimes={n_times} cflags="-mcmodel=medium -Ofast -flto"'
stream:
spack_spec: [email protected]-caliper{modifier_spack_variant}
compiler: default-compiler
environments:
stream_{array_size}_{n_times}:
stream:
packages:
- stream_{array_size}_{n_times}
- default-mpi
- stream
- '{modifier_package_name}'
2 changes: 1 addition & 1 deletion modifiers/allocation/modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def flux_instructions(self, v):
cmd_opts.append(f"--gpus-per-task={gpus_per_rank}")

if v.timeout:
batch_opts.append("-t {v.timeout}m")
batch_opts.append(f"-t {v.timeout}m")

batch_directives = list(f"# flux: {x}" for x in (cmd_opts + batch_opts))

Expand Down
102 changes: 102 additions & 0 deletions repo/stream/application.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

import os
from ramble.appkit import *
from ramble.expander import Expander


class Stream(SpackApplication):
'''Define STREAM application'''
name = 'stream'

maintainers('dodecatheon')

tags('memorybenchmark', 'microbenchmark', 'memory-benchmark', 'micro-benchmark')

software_spec('stream',
spack_spec='[email protected] +openmp cflags="-O3 -DSTREAM_ARRAY_SIZE=80000000 -DNTIMES=20"',
compiler='gcc12')

required_package('stream')

executable('execute', 'stream -n {n} -s {s} -o {o}', use_mpi=True)

workload('stream', executable='execute')

workload_variable('n', default='10', description='NTIMES', workloads=['stream'])
workload_variable('s', default='10000000', description='STREAM_ARRAY_SIZE', workloads=['stream'])
workload_variable('o', default='0', description='OFFSET', workloads=['stream'])

log_file = os.path.join(Expander.expansion_str('experiment_run_dir'),
Expander.expansion_str('experiment_name') + '.out')

success_criteria('valid', mode='string',
match=r'Solution Validates: avg error less than 1.000000e-13 on all three arrays',
file=log_file)

figure_of_merit("Array size",
log_file=log_file,
fom_regex=r'Array size\s+\=\s+(?P<array_size>[0-9]+)',
group_name='array_size',
units='elements')

figure_of_merit("Array memory",
log_file=log_file,
fom_regex=r'Memory per array\s+\=\s+(?P<array_mem>[0-9]+)\.*[0-9]*',
group_name='array_mem',
units='MiB')

figure_of_merit("Total memory",
log_file=log_file,
fom_regex=r'Total memory required\s+\=\s+(?P<total_mem>[0-9]+\.*[0-9]*)',
group_name='total_mem',
units='MiB')

figure_of_merit("Number of iterations per thread",
log_file=log_file,
fom_regex=r'Each kernel will be executed\s+(?P<n_times>[0-9]+)',
group_name='n_times',
units='')

figure_of_merit("Number of threads",
log_file=log_file,
fom_regex=r'Number of Threads counted\s+\=\s+(?P<n_threads>[0-9]+\.*[0-9]*)',
group_name='n_threads',
units='')

for opName in ['Copy', 'Scale', 'Add', 'Triad']:

opname = opName.lower()

opregex = (r'^' + opName + r':' +
r'\s+(?P<' + opname + r'_top_rate>[0-9]+\.[0-9]*)' +
r'\s+(?P<' + opname + r'_avg_time>[0-9]+\.[0-9]*)' +
r'\s+(?P<' + opname + r'_min_time>[0-9]+\.[0-9]*)' +
r'\s+(?P<' + opname + r'_max_time>[0-9]+\.[0-9]*)')

figure_of_merit(opName + ' top rate',
log_file=log_file,
fom_regex=opregex,
group_name=(opname + '_top_rate'),
units='MB/s')

figure_of_merit(opName + ' average time',
log_file=log_file,
fom_regex=opregex,
group_name=(opname + '_avg_time'),
units='s')

figure_of_merit(opName + ' min time',
log_file=log_file,
fom_regex=opregex,
group_name=(opname + '_min_time'),
units='s')

figure_of_merit(opName + ' max time',
log_file=log_file,
fom_regex=opregex,
group_name=(opname + '_max_time'),
units='s')
35 changes: 35 additions & 0 deletions repo/stream/package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2023 Lawrence Livermore National Security, LLC and other
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: Apache-2.0

from spack.package import *

class Stream(CMakePackage):
"""The STREAM benchmark is a simple synthetic benchmark program that
measures sustainable memory bandwidth (in MB/s) and the corresponding
computation rate for simple vector kernels.

This package builds a fork of the official code with Caliper support,
a CMake build system, and the ability to configure settings
(array size, iterations, offset) at runtime via the command line."""

homepage = "https://www.cs.virginia.edu/stream/ref.html"
git = "https://github.com/daboehme/STREAM.git"

version("5.10-caliper", git="https://github.com/daboehme/STREAM.git",
branch="caliper-benchpark")

variant("caliper", default=False, description="Enable Caliper/Adiak support")

requires("@5.10-caliper", when="+caliper")

depends_on("caliper", when="+caliper")
depends_on("[email protected]:", when="+caliper")

def cmake_args(self):
args = [
self.define_from_variant("STREAM_ENABLE_CALIPER", "caliper")
]

return args