From 46637496b5bd3ae0341f91e75bb31e1e2e9fb4f8 Mon Sep 17 00:00:00 2001 From: Dewi <43684384+dyokelson@users.noreply.github.com> Date: Tue, 3 Dec 2024 08:04:31 -0800 Subject: [PATCH 1/5] Qws experiment class (#387) * qws: rewrite as experiment object * qws: default to mpi instead of openmp * qws: fix style * Default openmp (that's the only version that exists) * Adding dry run * a few fixes, but not working yet, still a compiler error on dane * qws: rewrite as experiment object * qws: default to mpi instead of openmp * qws: fix style * Default openmp (that's the only version that exists) * Adding dry run * a few fixes, but not working yet, still a compiler error on dane * qws: rewrite as experiment object * qws: default to mpi instead of openmp * qws: fix style * Default openmp (that's the only version that exists) * Adding dry run * a few fixes, but not working yet, still a compiler error on dane * qws: rewrite as experiment object * qws: default to mpi instead of openmp * qws: fix style * Adding dry run * a few fixes, but not working yet, still a compiler error on dane * fixing weird tabbing issue * Adding dot slash * missing pipe in dry run * lint * format * Update experiment.py * move qws experiment * fix ci file * one dry run * update to inheritance model * fix spec * add license * add workload variant * edit dryrun * qws should work now * Update run.yml * remove experiments dir * black * fix dryrun * fix dryrun --------- Co-authored-by: Alec Scott Co-authored-by: pearce8 --- .github/workflows/run.yml | 12 +++++++ experiments/qws/experiment.py | 61 +++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 experiments/qws/experiment.py diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index 558be60b..262beebe 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -421,6 +421,18 @@ jobs: --disable-logger \ workspace setup --dry-run + - name: Dry run dynamic qws/openmp with dynamic CTS ruby + run: | + system_id=$(./bin/benchpark system id ./ruby-system) + ./bin/benchpark experiment init --dest qws-openmp qws +openmp + ./bin/benchpark setup qws-openmp ./ruby-system workspace/ + . workspace/setup.sh + ramble \ + --workspace-dir workspace/qws-openmp/$system_id/workspace \ + --disable-progress-bar \ + --disable-logger \ + workspace setup --dry-run + - name: Dry run dynamic saxpy/openmp with dynamic fugaku run: | ./bin/benchpark system init --dest=fugaku-system fugaku diff --git a/experiments/qws/experiment.py b/experiments/qws/experiment.py new file mode 100644 index 00000000..6bdb02d9 --- /dev/null +++ b/experiments/qws/experiment.py @@ -0,0 +1,61 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +from benchpark.directives import variant +from benchpark.experiment import Experiment +from benchpark.openmp import OpenMPExperiment + + +class Qws(Experiment, OpenMPExperiment): + + variant( + "workload", + default="qws", + description="qws", + ) + + variant( + "version", + default="master", + description="app version", + ) + + def compute_applications_section(self): + + self.add_experiment_variable("experiment_setup", "") + self.add_experiment_variable("lx", "32") + self.add_experiment_variable("ly", "6") + self.add_experiment_variable("lz", "4") + self.add_experiment_variable("lt", "3") + self.add_experiment_variable("px", "1") + self.add_experiment_variable("py", "1") + self.add_experiment_variable("pz", "1") + self.add_experiment_variable("pt", "1") + self.add_experiment_variable("tol_outer", "-1") + self.add_experiment_variable("tol_inner", "-1") + self.add_experiment_variable("maxiter_plus1_outer", "6") + self.add_experiment_variable("maxiter_inner", "50") + + if self.spec.satisfies("+openmp"): + self.add_experiment_variable("n_nodes", ["1"], True) + self.add_experiment_variable("processes_per_node", ["1"]) + self.add_experiment_variable("n_ranks", "{processes_per_node} * {n_nodes}") + self.add_experiment_variable("omp_num_threads", ["48"]) + self.add_experiment_variable("arch", "OpenMP") + + def compute_spack_section(self): + # get package version + app_version = self.spec.variants["version"][0] + + system_specs = {} + system_specs["compiler"] = "default-compiler" + system_specs["mpi"] = "default-mpi" + + # if package_spec left empty spack will use external + self.add_spack_spec(system_specs["mpi"]) + + self.add_spack_spec( + self.name, [f"qws@{app_version} +mpi", system_specs["compiler"]] + ) From 591ad7b754d950eaa1792b0a5bd1106a4d99bc55 Mon Sep 17 00:00:00 2001 From: Peter Scheibel Date: Tue, 3 Dec 2024 08:08:32 -0800 Subject: [PATCH 2/5] Aws system: integrate 6a/7a configs (#468) * pull in static 7a/6a configs * class name fix; generate software.yaml properly; reference error to resource dir for packages.yaml; define extra mpirun-like opts * add dry run * quote error; consolidate into one line --- .github/workflows/run.yml | 11 ++++ .../compilers/gcc/00-gcc-7-compilers.yaml | 14 +++++ .../externals/base/00-packages.yaml | 28 +++++++++ systems/aws-pcluster/system.py | 63 ++++++++++++++++++- 4 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 systems/aws-pcluster/compilers/gcc/00-gcc-7-compilers.yaml create mode 100644 systems/aws-pcluster/externals/base/00-packages.yaml diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index 262beebe..6885c420 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -384,6 +384,17 @@ jobs: --disable-logger \ workspace setup --dry-run + - name: Dry run dynamic saxpy/openmp with dynamic aws + run: | + ./bin/benchpark system init --dest=aws1 aws-pcluster instance_type=hpc6a.48xlarge + ./bin/benchpark setup ./saxpy-omp-generic ./aws1 workspace/ + . workspace/setup.sh + ramble \ + --workspace-dir "workspace/saxpy-omp-generic/aws1/workspace" \ + --disable-progress-bar \ + --disable-logger \ + workspace setup --dry-run + - name: Dry run dynamic remhos/mpi with dynamic llnl-cluster ruby run: | ./bin/benchpark experiment init --dest=remhos-ruby remhos diff --git a/systems/aws-pcluster/compilers/gcc/00-gcc-7-compilers.yaml b/systems/aws-pcluster/compilers/gcc/00-gcc-7-compilers.yaml new file mode 100644 index 00000000..05b641ee --- /dev/null +++ b/systems/aws-pcluster/compilers/gcc/00-gcc-7-compilers.yaml @@ -0,0 +1,14 @@ +compilers: +- compiler: + spec: gcc@7.3.1 + paths: + cc: /usr/bin/gcc + cxx: /usr/bin/g++ + f77: /usr/bin/gfortran + fc: /usr/bin/gfortran + flags: {} + operating_system: alinux2 + target: x86_64 + modules: [] + environment: {} + extra_rpaths: [] diff --git a/systems/aws-pcluster/externals/base/00-packages.yaml b/systems/aws-pcluster/externals/base/00-packages.yaml new file mode 100644 index 00000000..a3744ac0 --- /dev/null +++ b/systems/aws-pcluster/externals/base/00-packages.yaml @@ -0,0 +1,28 @@ +packages: + tar: + externals: + - spec: tar@1.26 + prefix: /usr + buildable: false + gmake: + externals: + - spec: gmake@3.8.2 + prefix: /usr + blas: + externals: + - spec: blas@3.4.2 + prefix: /usr + buildable: false + lapack: + externals: + - spec: lapack@3.4.2 + prefix: /usr + buildable: false + mpi: + buildable: false + openmpi: + externals: + - spec: openmpi@4.1.5%gcc@7.3.1 + prefix: /opt/amazon/openmpi + extra_attributes: + ldflags: "-L/opt/amazon/openmpi/lib -lmpi" diff --git a/systems/aws-pcluster/system.py b/systems/aws-pcluster/system.py index 7d6a27ff..459072ff 100644 --- a/systems/aws-pcluster/system.py +++ b/systems/aws-pcluster/system.py @@ -3,6 +3,8 @@ # # SPDX-License-Identifier: Apache-2.0 +import pathlib + from benchpark.system import System from benchpark.directives import variant @@ -17,21 +19,76 @@ "sys_cores_per_node": 4, "sys_mem_per_node": 8, }, + "hpc7a.48xlarge": { + "sys_cores_per_node": 96, + "sys_mem_per_node": 768, + }, + "hpc6a.48xlarge": { + "sys_cores_per_node": 96, + "sys_mem_per_node": 384, + }, } -class Aws(System): +class AwsPcluster(System): variant( "instance_type", - values=("c6g.xlarge", "c4.xlarge"), + values=("c6g.xlarge", "c4.xlarge", "hpc7a.48xlarge", "hpc6a.48xlarge"), default="c4.xlarge", description="AWS instance type", ) def initialize(self): super().initialize() - self.scheduler = "mpi" + self.scheduler = "slurm" # TODO: for some reason I have to index to get value, even if multi=False attrs = id_to_resources.get(self.spec.variants["instance_type"][0]) for k, v in attrs.items(): setattr(self, k, v) + + def system_specific_variables(self): + return { + "extra_cmd_opts": '--mpi=pmix --export=ALL,FI_EFA_USE_DEVICE_RDMA=1,FI_PROVIDER="efa",OMPI_MCA_mtl_base_verbose=100', + } + + def external_pkg_configs(self): + externals = AwsPcluster.resource_location / "externals" + + selections = [ + externals / "base" / "00-packages.yaml", + ] + + return selections + + def compiler_configs(self): + compilers = AwsPcluster.resource_location / "compilers" + + selections = [ + compilers / "gcc" / "00-gcc-7-compilers.yaml", + ] + + return selections + + def generate_description(self, output_dir): + super().generate_description(output_dir) + + sw_description = pathlib.Path(output_dir) / "software.yaml" + + with open(sw_description, "w") as f: + f.write(self.sw_description()) + + def sw_description(self): + return """\ +software: + packages: + default-compiler: + pkg_spec: gcc@7.3.1 + default-mpi: + pkg_spec: openmpi@4.1.5%gcc@7.3.1 + compiler-gcc: + pkg_spec: gcc@7.3.1 + lapack: + pkg_spec: lapack@3.4.2 + mpi-gcc: + pkg_spec: openmpi@4.1.5%gcc@7.3.1 +""" From 498fc4d49d2dad97444dd49aadf86450f3545fb0 Mon Sep 17 00:00:00 2001 From: Riyaz Haque <5333387+rfhaque@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:54:44 -0800 Subject: [PATCH 3/5] lammps experiment.py (#446) * lammps experiment.py * lint formatiing * lint formatiing * Turn off gpu aware mpi for rocm * Enable gtl in lammps * Change lammps legacy ramble configs * Add lammps dry-runs * undo to merge in develop * reapply path changes --------- Co-authored-by: Riyaz Haque Co-authored-by: pearce8 --- .github/workflows/run.yml | 46 +++++++++- experiments/lammps/experiment.py | 90 +++++++++++++++++++ legacy/experiments/lammps/openmp/ramble.yaml | 75 +++++++--------- legacy/experiments/lammps/rocm/ramble.yaml | 79 +++++++--------- .../auxiliary_software_files/compilers.yaml | 4 +- .../software.yaml | 2 +- repo/lammps/package.py | 12 +++ .../compilers/rocm/00-rocm-551-compilers.yaml | 4 +- 8 files changed, 212 insertions(+), 100 deletions(-) create mode 100644 experiments/lammps/experiment.py diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index 6885c420..69fbd238 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -243,7 +243,17 @@ jobs: --disable-logger \ workspace setup --dry-run - - name: Dry run lammps/rocm on Tioga with allocation modifier + - name: Dry run lammps/openmp with static Ruby + run: | + ./bin/benchpark setup lammps/openmp LLNL-Ruby-icelake-OmniPath workspace/ + . workspace/setup.sh + ramble \ + --workspace-dir workspace/lammps/openmp/LLNL-Ruby-icelake-OmniPath/workspace \ + --disable-progress-bar \ + --disable-logger \ + workspace setup --dry-run + + - name: Dry run lammps/rocm with static Tioga run: | ./bin/benchpark setup lammps/rocm LLNL-Tioga-HPECray-zen3-MI250X-Slingshot workspace/ . workspace/setup.sh @@ -253,6 +263,40 @@ jobs: --disable-logger \ workspace setup --dry-run + - name: Dry run dynamic lammps/openmp on static Ruby + run: | + ./bin/benchpark experiment init --dest=lammps-openmp lammps+openmp + ./bin/benchpark setup ./lammps-openmp LLNL-Ruby-icelake-OmniPath workspace/ + . workspace/setup.sh + ramble \ + --workspace-dir workspace/lammps-openmp/LLNL-Ruby-icelake-OmniPath/workspace \ + --disable-progress-bar \ + --disable-logger \ + workspace setup --dry-run + + - name: Dry run dynamic lammps/rocm on static Tioga + run: | + ./bin/benchpark experiment init --dest=lammps-rocm lammps+rocm + ./bin/benchpark setup ./lammps-rocm LLNL-Tioga-HPECray-zen3-MI250X-Slingshot workspace/ + . workspace/setup.sh + ramble \ + --workspace-dir workspace/lammps-rocm/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/workspace \ + --disable-progress-bar \ + --disable-logger \ + workspace setup --dry-run + + - name: Dry run dynamic lammps/rocm with dynamic Tioga + run: | + ./bin/benchpark experiment init --dest=lammps-rocm-tioga lammps+rocm + ./bin/benchpark setup lammps-rocm-tioga ./tioga-system workspace/ + system_id=$(./bin/benchpark system id ./tioga-system) + . workspace/setup.sh + ramble \ + --workspace-dir workspace/lammps-rocm-tioga/$system_id/workspace \ + --disable-progress-bar \ + --disable-logger \ + workspace setup --dry-run + - name: Dry run hpl/openmp with Caliper-topdown modifier on Magma run: | ./bin/benchpark setup hpl/openmp --modifier=caliper-topdown LLNL-Magma-Penguin-icelake-OmniPath workspace/ diff --git a/experiments/lammps/experiment.py b/experiments/lammps/experiment.py new file mode 100644 index 00000000..602becbc --- /dev/null +++ b/experiments/lammps/experiment.py @@ -0,0 +1,90 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +from benchpark.directives import variant +from benchpark.experiment import Experiment +from benchpark.openmp import OpenMPExperiment +from benchpark.rocm import ROCmExperiment + + +class Lammps( + Experiment, + OpenMPExperiment, + ROCmExperiment, +): + variant( + "workload", + default="hns-reaxff", + values=("hns-reaxff", "lj", "eam", "chain", "chute", "rhodo"), + description="workloads", + ) + + variant( + "version", + default="20231121", + description="app version", + ) + + def compute_applications_section(self): + if self.spec.satisfies("+openmp"): + problem_sizes = {"x": 8, "y": 8, "z": 8} + kokkos_mode = "t {n_threads_per_proc}" + kokkos_gpu_aware = "off" + kokkos_comm = "host" + elif self.spec.satisfies("+rocm"): + problem_sizes = {"x": 20, "y": 40, "z": 32} + kokkos_mode = "g 1" + kokkos_gpu_aware = "on" + kokkos_comm = "device" + + for nk, nv in problem_sizes.items(): + self.add_experiment_variable(nk, nv, True) + + input_sizes = " ".join(f"-v {k} {{{k}}}" for k in problem_sizes.keys()) + + if self.spec.satisfies("+openmp"): + self.add_experiment_variable("n_nodes", 1, True) + self.add_experiment_variable("n_ranks_per_node", 36, True) + self.add_experiment_variable("n_threads_per_proc", 1, True) + elif self.spec.satisfies("+rocm"): + self.add_experiment_variable("n_nodes", 8, True) + self.add_experiment_variable("n_ranks_per_node", 8, True) + self.add_experiment_variable("n_gpus", 64, True) + + self.add_experiment_variable("timesteps", 100, False) + self.add_experiment_variable("input_file", "{input_path}/in.reaxc.hns", False) + self.add_experiment_variable( + "lammps_flags", + f"{input_sizes} -k on {kokkos_mode} -sf kk -pk kokkos gpu/aware {kokkos_gpu_aware} neigh half comm {kokkos_comm} neigh/qeq full newton on -nocite", + False, + ) + + def compute_spack_section(self): + # get package version + app_version = self.spec.variants["version"][0] + + # get system config options + # TODO: Get compiler/mpi/package handles directly from system.py + system_specs = {} + system_specs["compiler"] = "default-compiler" + system_specs["mpi"] = "default-mpi" + if self.spec.satisfies("+rocm"): + system_specs["rocm_arch"] = "{rocm_arch}" + system_specs["blas"] = "blas-rocm" + + # set package spack specs + if self.spec.satisfies("+rocm"): + # empty package_specs value implies external package + self.add_spack_spec(system_specs["blas"]) + # empty package_specs value implies external package + self.add_spack_spec(system_specs["mpi"]) + + self.add_spack_spec( + self.name, + [ + f"lammps@{app_version} +opt+manybody+molecule+kspace+rigid+kokkos+asphere+dpd-basic+dpd-meso+dpd-react+dpd-smooth+reaxff lammps_sizes=bigbig ", + system_specs["compiler"], + ], + ) diff --git a/legacy/experiments/lammps/openmp/ramble.yaml b/legacy/experiments/lammps/openmp/ramble.yaml index 6ce9626c..9269cd58 100644 --- a/legacy/experiments/lammps/openmp/ramble.yaml +++ b/legacy/experiments/lammps/openmp/ramble.yaml @@ -1,59 +1,44 @@ -# Copyright 2023 Lawrence Livermore National Security, LLC and other -# Benchpark Project Developers. See the top-level COPYRIGHT file for details. -# -# SPDX-License-Identifier: Apache-2.0 - ramble: - include: - - ./configs/software.yaml - - ./configs/variables.yaml - - config: - deprecated: true - spack_flags: - install: '--add --keep-stage' - concretize: '-U -f' - - modifiers: - - name: allocation - applications: lammps: workloads: hns-reaxff: - variables: - size_name: ['medium'] - size_x: [2] - size_y: [2] - size_z: [2] - scaling_nodes: [1] - n_nodes: '{scaling_nodes}' - n_threads_per_proc: '1' - lammps_flags: '-v x {size_x} -v y {size_y} -v z {size_z}' experiments: - scaling_{n_nodes}nodes_{size_name}: + lammps_hns-reaxff_single_node_openmp_{x}_{y}_{z}_{n_nodes}_{n_ranks_per_node}_{n_threads_per_proc}: + exclude: {} + matrix: [] + variables: + input_file: '{input_path}/in.reaxc.hns' + lammps_flags: -v x {x} -v y {y} -v z {z} -k on t {n_threads_per_proc} + -sf kk -pk kokkos gpu/aware off neigh half comm host neigh/qeq full + newton on -nocite + n_nodes: 1 + n_ranks_per_node: 36 + n_threads_per_proc: 1 + timesteps: 100 + x: 8 + y: 8 + z: 8 variants: package_manager: spack - variables: - env_name: lammps - n_ranks_per_node: ['36'] - zips: - problems: - - size_name - - size_x - - size_y - - size_z - matrix: - - problems - - scaling_nodes + zips: {} + config: + deprecated: true + spack_flags: + concretize: -U -f + install: --add --keep-stage + include: + - ./configs + modifiers: + - name: allocation software: - packages: - lammps: - pkg_spec: lammps@20231121 +opt+manybody+molecule+kspace+rigid+openmp+openmp-package+asphere+dpd-basic+dpd-meso+dpd-react+dpd-smooth+reaxff - compiler: default-compiler environments: lammps: packages: - - lapack - default-mpi - lammps + packages: + lammps: + compiler: default-compiler + pkg_spec: lammps@20231121 +opt+manybody+molecule+kspace+rigid+kokkos+asphere+dpd-basic+dpd-meso+dpd-react+dpd-smooth+reaxff + lammps_sizes=bigbig +openmp ~rocm diff --git a/legacy/experiments/lammps/rocm/ramble.yaml b/legacy/experiments/lammps/rocm/ramble.yaml index 713bf81e..5d058698 100644 --- a/legacy/experiments/lammps/rocm/ramble.yaml +++ b/legacy/experiments/lammps/rocm/ramble.yaml @@ -1,63 +1,44 @@ -# Copyright 2023 Lawrence Livermore National Security, LLC and other -# Benchpark Project Developers. See the top-level COPYRIGHT file for details. -# -# SPDX-License-Identifier: Apache-2.0 - ramble: - include: - - ./configs/software.yaml - - ./configs/variables.yaml - - ./configs/modifier.yaml - - config: - deprecated: true - spack_flags: - install: '--add --keep-stage' - concretize: '-U -f' - - modifiers: - - name: allocation - applications: lammps: workloads: hns-reaxff: - variables: - scaling_nodes: ['8'] - n_ranks_per_node: ['8'] - n_nodes: '{scaling_nodes}' - n_gpus: 64 experiments: - scaling_{n_nodes}nodes_{size_name}: - variants: - package_manager: spack + lammps_hns-reaxff_single_node_rocm_{x}_{y}_{z}_{n_nodes}_{n_ranks_per_node}_{n_gpus}: + exclude: {} + matrix: [] variables: - size_name: ['bigbig'] - size_x: [20] - size_y: [40] - size_z: [32] - timesteps: 100 input_file: '{input_path}/in.reaxc.hns' - lammps_flags: '-k on g 1 -sf kk -pk kokkos gpu/aware off neigh half comm device neigh/qeq full newton on -v x {size_x} -v y {size_y} -v z {size_z} -nocite' - zips: - problems: - - size_name - - size_x - - size_y - - size_z - matrix: - - problems - - scaling_nodes - + lammps_flags: -v x {x} -v y {y} -v z {z} -k on g 1 -sf kk -pk kokkos + gpu/aware on neigh half comm device neigh/qeq full newton on -nocite + n_gpus: 64 + n_nodes: 8 + n_ranks_per_node: 8 + timesteps: 100 + x: 20 + y: 40 + z: 32 + variants: + package_manager: spack + zips: {} + config: + deprecated: true + spack_flags: + concretize: -U -f + install: --add --keep-stage + include: + - ./configs + modifiers: + - name: allocation software: - packages: - lammps: - pkg_spec: lammps@20231121 +opt+manybody+molecule+kspace+rigid+kokkos+rocm+asphere+dpd-basic+dpd-meso+dpd-react+dpd-smooth+reaxff~openmp lammps_sizes=bigbig amdgpu_target={rocm_arch} - compiler: compiler-rocm environments: lammps: packages: - blas-rocm - - lapack - - mpi-rocm-no-gtl + - default-mpi - lammps + packages: + lammps: + compiler: default-compiler + pkg_spec: lammps@20231121 +opt+manybody+molecule+kspace+rigid+kokkos+asphere+dpd-basic+dpd-meso+dpd-react+dpd-smooth+reaxff + lammps_sizes=bigbig ~openmp +rocm amdgpu_target={rocm_arch} diff --git a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/compilers.yaml b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/compilers.yaml index 0d028757..ba9be75d 100644 --- a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/compilers.yaml +++ b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/compilers.yaml @@ -21,8 +21,8 @@ compilers: modules: [] environment: prepend_path: - LD_LIBRARY_PATH: /opt/cray/pe/cce/16.0.0/cce/x86_64/lib - extra_rpaths: [/opt/cray/pe/cce/16.0.0/cce/x86_64/lib/, /opt/cray/pe/gcc-libs/] + LD_LIBRARY_PATH: "/opt/cray/pe/cce/16.0.0/cce/x86_64/lib:/opt/rocm-5.5.1/lib" + extra_rpaths: [/opt/cray/pe/cce/16.0.0/cce/x86_64/lib/, /opt/cray/pe/gcc-libs/, /opt/rocm-5.5.1/lib] - compiler: spec: rocmcc@5.5.1 paths: diff --git a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/software.yaml b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/software.yaml index e729bc0a..7b76fd61 100644 --- a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/software.yaml +++ b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/software.yaml @@ -8,7 +8,7 @@ software: default-compiler: pkg_spec: cce@16.0.0-rocm5.5.1 default-mpi: - pkg_spec: cray-mpich@8.1.26%cce@16.0.0 ~gtl + pkg_spec: cray-mpich@8.1.26%cce@16.0.0 +gtl compiler-rocm: pkg_spec: cce@16.0.0-rocm5.5.1 compiler-amdclang: diff --git a/repo/lammps/package.py b/repo/lammps/package.py index e2403af8..230dbbaf 100644 --- a/repo/lammps/package.py +++ b/repo/lammps/package.py @@ -9,6 +9,10 @@ class Lammps(BuiltinLammps): + depends_on("kokkos+openmp", when="+openmp") + depends_on("kokkos+rocm", when="+rocm") + depends_on("kokkos+cuda", when="+cuda") + def setup_run_environment(self, env): super(BuiltinLammps, self).setup_run_environment(env) @@ -16,3 +20,11 @@ def setup_run_environment(self, env): if self.compiler.extra_rpaths: for rpath in self.compiler.extra_rpaths: env.prepend_path("LD_LIBRARY_PATH", rpath) + + def setup_build_environment(self, env): + super().setup_build_environment(env) + + spec = self.spec + if "+mpi" in spec: + if spec["mpi"].extra_attributes and "ldflags" in spec["mpi"].extra_attributes: + env.append_flags("LDFLAGS", spec["mpi"].extra_attributes["ldflags"]) diff --git a/systems/llnl-elcapitan/compilers/rocm/00-rocm-551-compilers.yaml b/systems/llnl-elcapitan/compilers/rocm/00-rocm-551-compilers.yaml index 9185da69..6fe5e6bf 100644 --- a/systems/llnl-elcapitan/compilers/rocm/00-rocm-551-compilers.yaml +++ b/systems/llnl-elcapitan/compilers/rocm/00-rocm-551-compilers.yaml @@ -21,8 +21,8 @@ compilers: modules: [] environment: prepend_path: - LD_LIBRARY_PATH: /opt/cray/pe/cce/16.0.0/cce/x86_64/lib - extra_rpaths: [/opt/cray/pe/cce/16.0.0/cce/x86_64/lib/, /opt/cray/pe/gcc-libs/] + LD_LIBRARY_PATH: "/opt/cray/pe/cce/16.0.0/cce/x86_64/lib:/opt/rocm-5.5.1/lib" + extra_rpaths: [/opt/cray/pe/cce/16.0.0/cce/x86_64/lib/, /opt/cray/pe/gcc-libs/, /opt/rocm-5.5.1/lib] - compiler: spec: rocmcc@5.5.1 paths: From 84a857c932828f0262de25a39f5d85fbebe76fde Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 13:51:18 -0600 Subject: [PATCH 4/5] Bump sphinxcontrib-programoutput from 0.17 to 0.18 in /.github/workflows/requirements (#479) Bump sphinxcontrib-programoutput in /.github/workflows/requirements Bumps [sphinxcontrib-programoutput](https://github.com/NextThought/sphinxcontrib-programoutput) from 0.17 to 0.18. - [Changelog](https://github.com/OpenNTI/sphinxcontrib-programoutput/blob/master/CHANGES.rst) - [Commits](https://github.com/NextThought/sphinxcontrib-programoutput/compare/0.17...0.18) --- updated-dependencies: - dependency-name: sphinxcontrib-programoutput dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/requirements/docs.txt b/.github/workflows/requirements/docs.txt index 38619858..07a1ee79 100644 --- a/.github/workflows/requirements/docs.txt +++ b/.github/workflows/requirements/docs.txt @@ -4,7 +4,7 @@ sphinx-rtd-theme==3.0.2 codespell==2.3.0 pandas==2.2.3 pyyaml==6.0.2 -sphinxcontrib-programoutput==0.17 +sphinxcontrib-programoutput==0.18 # The remaining requirements are from Ramble pytest flake8 From e54c9030e355d9ac2fe49093ae3e480fe7f211f8 Mon Sep 17 00:00:00 2001 From: Peter Scheibel Date: Mon, 9 Dec 2024 13:15:09 -0800 Subject: [PATCH 5/5] El Capitan System: make it easier to add rocm/mpi versions (#475) * make it easier to add new rocm versions * can remove static rocm configs (now generated from template) * cce/rocm compiler configs now generated dynamically * fix reference errors * dynamic mpi generation * proper gtl spec * was not returning correct config for mpich with gcc * demonstrate adding new rocm version * style edits * update dry runs now that 551 -> 5.5.1 --- .github/workflows/run.yml | 4 +- lib/benchpark/system.py | 12 + .../compilers/rocm/00-rocm-551-compilers.yaml | 50 ----- .../compilers/rocm/01-rocm-543-compilers.yaml | 26 --- .../externals/mpi/00-gcc-ngtl-packages.yaml | 8 - .../externals/mpi/01-cce-ngtl-packages.yaml | 8 - .../externals/mpi/02-cce-ygtl-packages.yaml | 10 - .../rocm/00-version-543-packages.yaml | 73 ------ .../rocm/01-version-551-packages.yaml | 91 -------- systems/llnl-elcapitan/system.py | 212 ++++++++++++++++-- systems/llnl-sierra/system.py | 13 -- 11 files changed, 208 insertions(+), 299 deletions(-) delete mode 100644 systems/llnl-elcapitan/compilers/rocm/00-rocm-551-compilers.yaml delete mode 100644 systems/llnl-elcapitan/compilers/rocm/01-rocm-543-compilers.yaml delete mode 100644 systems/llnl-elcapitan/externals/mpi/00-gcc-ngtl-packages.yaml delete mode 100644 systems/llnl-elcapitan/externals/mpi/01-cce-ngtl-packages.yaml delete mode 100644 systems/llnl-elcapitan/externals/mpi/02-cce-ygtl-packages.yaml delete mode 100644 systems/llnl-elcapitan/externals/rocm/00-version-543-packages.yaml delete mode 100644 systems/llnl-elcapitan/externals/rocm/01-version-551-packages.yaml diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index 69fbd238..eea1d9cd 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -175,7 +175,7 @@ jobs: - name: Dry run kripke/rocm on dynamic Tioga with allocation modifier run: | - ./bin/benchpark system init --dest=tioga-system llnl-elcapitan rocm=551 compiler=cce ~gtl + ./bin/benchpark system init --dest=tioga-system llnl-elcapitan rocm=5.5.1 compiler=cce ~gtl ./bin/benchpark setup kripke/rocm ./tioga-system workspace/ system_id=$(./bin/benchpark system id ./tioga-system) . workspace/setup.sh @@ -198,7 +198,7 @@ jobs: - name: Dry run dynamic saxpy/rocm with dynamic Tioga run: | - ./bin/benchpark system init --dest=tioga-system2 llnl-elcapitan rocm=551 compiler=cce ~gtl + ./bin/benchpark system init --dest=tioga-system2 llnl-elcapitan rocm=5.5.1 compiler=cce ~gtl system_id=$(./bin/benchpark system id ./tioga-system2) ./bin/benchpark experiment init --dest=saxpy-rocm2 saxpy+rocm ./bin/benchpark setup ./saxpy-rocm2 ./tioga-system2 workspace/ diff --git a/lib/benchpark/system.py b/lib/benchpark/system.py index ee527788..55079072 100644 --- a/lib/benchpark/system.py +++ b/lib/benchpark/system.py @@ -8,6 +8,7 @@ import os import pathlib import sys +import tempfile import yaml import benchpark.paths @@ -190,6 +191,17 @@ def variables_yaml(self): mpi_command: "placeholder" """ + def _adhoc_cfgs(self): + if not getattr(self, "_tmp_cfgs", None): + self._tmp_cfgs = tempfile.mkdtemp() + self._adhoc_cfg_idx = 0 + return self._tmp_cfgs + + def next_adhoc_cfg(self): + basedir = self._adhoc_cfgs() + self._adhoc_cfg_idx += 1 + return os.path.join(basedir, str(self._adhoc_cfg_idx)) + def unique_dir_for_description(system_dir): system_id_path = os.path.join(system_dir, "system_id.yaml") diff --git a/systems/llnl-elcapitan/compilers/rocm/00-rocm-551-compilers.yaml b/systems/llnl-elcapitan/compilers/rocm/00-rocm-551-compilers.yaml deleted file mode 100644 index 6fe5e6bf..00000000 --- a/systems/llnl-elcapitan/compilers/rocm/00-rocm-551-compilers.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2023 Lawrence Livermore National Security, LLC and other -# Benchpark Project Developers. See the top-level COPYRIGHT file for details. -# -# SPDX-License-Identifier: Apache-2.0 - -compilers: -- compiler: - spec: cce@16.0.0-rocm5.5.1 - paths: - cc: /opt/cray/pe/cce/16.0.0/bin/craycc - cxx: /opt/cray/pe/cce/16.0.0/bin/crayCC - f77: /opt/cray/pe/cce/16.0.0/bin/crayftn - fc: /opt/cray/pe/cce/16.0.0/bin/crayftn - flags: - cflags: -g -O2 - cxxflags: -g -O2 -std=c++17 - fflags: -g -O2 -hnopattern - ldflags: -ldl - operating_system: rhel8 - target: x86_64 - modules: [] - environment: - prepend_path: - LD_LIBRARY_PATH: "/opt/cray/pe/cce/16.0.0/cce/x86_64/lib:/opt/rocm-5.5.1/lib" - extra_rpaths: [/opt/cray/pe/cce/16.0.0/cce/x86_64/lib/, /opt/cray/pe/gcc-libs/, /opt/rocm-5.5.1/lib] -- compiler: - spec: rocmcc@5.5.1 - paths: - cc: /opt/rocm-5.5.1/bin/amdclang - cxx: /opt/rocm-5.5.1/bin/amdclang++ - f77: /opt/rocm-5.5.1/bin/amdflang - fc: /opt/rocm-5.5.1/bin/amdflang - flags: - cflags: -g -O2 - cxxflags: -g -O2 - operating_system: rhel8 - target: x86_64 - modules: [] - environment: - set: - RFE_811452_DISABLE: '1' - append_path: - LD_LIBRARY_PATH: /opt/cray/pe/gcc-libs - prepend_path: - LD_LIBRARY_PATH: "/opt/cray/pe/cce/16.0.0/cce/x86_64/lib:/opt/cray/pe/pmi/6.1.12/lib" - LIBRARY_PATH: /opt/rocm-5.5.1/lib - extra_rpaths: - - /opt/rocm-5.5.1/lib - - /opt/cray/pe/gcc-libs - - /opt/cray/pe/cce/16.0.0/cce/x86_64/lib diff --git a/systems/llnl-elcapitan/compilers/rocm/01-rocm-543-compilers.yaml b/systems/llnl-elcapitan/compilers/rocm/01-rocm-543-compilers.yaml deleted file mode 100644 index a032c69a..00000000 --- a/systems/llnl-elcapitan/compilers/rocm/01-rocm-543-compilers.yaml +++ /dev/null @@ -1,26 +0,0 @@ -compilers: -- compiler: - spec: rocmcc@5.4.3 - paths: - cc: /opt/rocm-5.4.3/bin/amdclang - cxx: /opt/rocm-5.4.3/bin/amdclang++ - f77: /opt/rocm-5.4.3/bin/amdflang - fc: /opt/rocm-5.4.3/bin/amdflang - flags: - cflags: -g -O2 - cxxflags: -g -O2 - operating_system: rhel8 - target: x86_64 - modules: [] - environment: - set: - RFE_811452_DISABLE: '1' - append_path: - LD_LIBRARY_PATH: /opt/cray/pe/gcc-libs - prepend_path: - LD_LIBRARY_PATH: "/opt/cray/pe/cce/16.0.0/cce/x86_64/lib:/opt/cray/pe/pmi/6.1.12/lib" - LIBRARY_PATH: /opt/rocm-5.4.3/lib - extra_rpaths: - - /opt/rocm-5.4.3/lib - - /opt/cray/pe/gcc-libs - - /opt/cray/pe/cce/16.0.0/cce/x86_64/lib diff --git a/systems/llnl-elcapitan/externals/mpi/00-gcc-ngtl-packages.yaml b/systems/llnl-elcapitan/externals/mpi/00-gcc-ngtl-packages.yaml deleted file mode 100644 index 4b2314e6..00000000 --- a/systems/llnl-elcapitan/externals/mpi/00-gcc-ngtl-packages.yaml +++ /dev/null @@ -1,8 +0,0 @@ -packages: - cray-mpich: - externals: - - spec: cray-mpich@8.1.26%gcc@12.2.0 ~gtl +wrappers - prefix: /opt/cray/pe/mpich/8.1.26/ofi/gnu/10.3 - extra_attributes: - gtl_lib_path: /opt/cray/pe/mpich/8.1.26/gtl/lib - ldflags: "-L/opt/cray/pe/mpich/8.1.26/ofi/gnu/10.3/lib -lmpi -L/opt/cray/pe/mpich/8.1.26/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/8.1.26/gtl/lib" diff --git a/systems/llnl-elcapitan/externals/mpi/01-cce-ngtl-packages.yaml b/systems/llnl-elcapitan/externals/mpi/01-cce-ngtl-packages.yaml deleted file mode 100644 index dc7b7b3c..00000000 --- a/systems/llnl-elcapitan/externals/mpi/01-cce-ngtl-packages.yaml +++ /dev/null @@ -1,8 +0,0 @@ -packages: - cray-mpich: - externals: - - spec: cray-mpich@8.1.26%cce@16.0.0 ~gtl +wrappers - prefix: /opt/cray/pe/mpich/8.1.26/ofi/crayclang/16.0 - extra_attributes: - gtl_lib_path: /opt/cray/pe/mpich/8.1.26/gtl/lib - ldflags: "-L/opt/cray/pe/mpich/8.1.26/ofi/crayclang/16.0/lib -lmpi -L/opt/cray/pe/mpich/8.1.26/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/8.1.26/gtl/lib" diff --git a/systems/llnl-elcapitan/externals/mpi/02-cce-ygtl-packages.yaml b/systems/llnl-elcapitan/externals/mpi/02-cce-ygtl-packages.yaml deleted file mode 100644 index 1289b84e..00000000 --- a/systems/llnl-elcapitan/externals/mpi/02-cce-ygtl-packages.yaml +++ /dev/null @@ -1,10 +0,0 @@ -packages: - cray-mpich: - externals: - - spec: cray-mpich@8.1.26%cce@16.0.0 +gtl +wrappers - prefix: /opt/cray/pe/mpich/8.1.26/ofi/crayclang/16.0 - extra_attributes: - gtl_cutoff_size: 4096 - fi_cxi_ats: 0 - gtl_lib_path: /opt/cray/pe/mpich/8.1.26/gtl/lib - ldflags: "-L/opt/cray/pe/mpich/8.1.26/ofi/crayclang/16.0/lib -lmpi -L/opt/cray/pe/mpich/8.1.26/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/8.1.26/gtl/lib -lmpi_gtl_hsa" diff --git a/systems/llnl-elcapitan/externals/rocm/00-version-543-packages.yaml b/systems/llnl-elcapitan/externals/rocm/00-version-543-packages.yaml deleted file mode 100644 index e4dea4c9..00000000 --- a/systems/llnl-elcapitan/externals/rocm/00-version-543-packages.yaml +++ /dev/null @@ -1,73 +0,0 @@ -packages: - hipfft: - externals: - - spec: hipfft@5.4.3 - prefix: /opt/rocm-5.4.3 - rocfft: - externals: - - spec: rocfft@5.4.3 - prefix: /opt/rocm-5.4.3 - rocprim: - externals: - - spec: rocprim@5.4.3 - prefix: /opt/rocm-5.4.3 - rocrand: - externals: - - spec: rocrand@5.4.3 - prefix: /opt/rocm-5.4.3/hiprand - rocsparse: - externals: - - spec: rocsparse@5.4.3 - prefix: /opt/rocm-5.4.3 - rocthrust: - externals: - - spec: rocthrust@5.4.3 - prefix: /opt/rocm-5.4.3 - hip: - externals: - - spec: hip@5.4.3 - prefix: /opt/rocm-5.4.3 - hsa-rocr-dev: - externals: - - spec: hsa-rocr-dev@5.4.3 - prefix: /opt/rocm-5.4.3 - comgr: - externals: - - spec: comgr@5.4.3 - prefix: /opt/rocm-5.4.3/ - hipsparse: - externals: - - spec: hipsparse@5.4.3 - prefix: /opt/rocm-5.4.3 - hipblas: - externals: - - spec: hipblas@5.4.3 - prefix: /opt/rocm-5.4.3/ - hsakmt-roct: - externals: - - spec: hsakmt-roct@5.4.3 - prefix: /opt/rocm-5.4.3/ - roctracer-dev-api: - externals: - - spec: roctracer-dev-api@5.4.3 - prefix: /opt/rocm-5.4.3/ - rocminfo: - externals: - - spec: rocminfo@5.4.3 - prefix: /opt/rocm-5.4.3/ - llvm: - externals: - - spec: llvm@15.0.0-5.4.3 - prefix: /opt/rocm-5.4.3/llvm - llvm-amdgpu: - externals: - - spec: llvm-amdgpu@5.4.3 - prefix: /opt/rocm-5.4.3/llvm - rocblas: - externals: - - spec: rocblas@5.4.3 - prefix: /opt/rocm-5.4.3 - rocsolver: - externals: - - spec: rocsolver@5.4.3 - prefix: /opt/rocm-5.4.3 diff --git a/systems/llnl-elcapitan/externals/rocm/01-version-551-packages.yaml b/systems/llnl-elcapitan/externals/rocm/01-version-551-packages.yaml deleted file mode 100644 index 2d871d34..00000000 --- a/systems/llnl-elcapitan/externals/rocm/01-version-551-packages.yaml +++ /dev/null @@ -1,91 +0,0 @@ -packages: - hipfft: - buildable: false - externals: - - spec: hipfft@5.5.1 - prefix: /opt/rocm-5.5.1 - rocfft: - buildable: false - externals: - - spec: rocfft@5.5.1 - prefix: /opt/rocm-5.5.1 - rocprim: - buildable: false - externals: - - spec: rocprim@5.5.1 - prefix: /opt/rocm-5.5.1 - rocrand: - buildable: false - externals: - - spec: rocrand@5.5.1 - prefix: /opt/rocm-5.5.1/hiprand - rocsparse: - buildable: false - externals: - - spec: rocsparse@5.5.1 - prefix: /opt/rocm-5.5.1 - rocthrust: - buildable: false - externals: - - spec: rocthrust@5.5.1 - prefix: /opt/rocm-5.5.1 - hip: - buildable: false - externals: - - spec: hip@5.5.1 - prefix: /opt/rocm-5.5.1 - hsa-rocr-dev: - buildable: false - externals: - - spec: hsa-rocr-dev@5.5.1 - prefix: /opt/rocm-5.5.1 - comgr: - buildable: false - externals: - - spec: comgr@5.5.1 - prefix: /opt/rocm-5.5.1/ - hipsparse: - buildable: false - externals: - - spec: hipsparse@5.5.1 - prefix: /opt/rocm-5.5.1 - hipblas: - buildable: false - externals: - - spec: hipblas@5.5.1 - prefix: /opt/rocm-5.5.1/ - hsakmt-roct: - buildable: false - externals: - - spec: hsakmt-roct@5.5.1 - prefix: /opt/rocm-5.5.1/ - roctracer-dev-api: - buildable: false - externals: - - spec: roctracer-dev-api@5.5.1 - prefix: /opt/rocm-5.5.1/ - rocminfo: - buildable: false - externals: - - spec: rocminfo@5.5.1 - prefix: /opt/rocm-5.5.1/ - llvm: - buildable: false - externals: - - spec: llvm@16.0.0-5.5.1 - prefix: /opt/rocm-5.5.1/llvm - llvm-amdgpu: - buildable: false - externals: - - spec: llvm-amdgpu@5.5.1 - prefix: /opt/rocm-5.5.1/llvm - rocblas: - buildable: false - externals: - - spec: rocblas@5.5.1 - prefix: /opt/rocm-5.5.1 - rocsolver: - buildable: false - externals: - - spec: rocsolver@5.5.1 - prefix: /opt/rocm-5.5.1 diff --git a/systems/llnl-elcapitan/system.py b/systems/llnl-elcapitan/system.py index 37615ae8..19909c89 100644 --- a/systems/llnl-elcapitan/system.py +++ b/systems/llnl-elcapitan/system.py @@ -33,8 +33,8 @@ class LlnlElcapitan(System): variant( "rocm", - default="551", - values=("543", "551"), + default="5.5.1", + values=("5.4.3", "5.5.1", "6.2.4"), description="ROCm version", ) @@ -72,23 +72,24 @@ def external_pkg_configs(self): externals = LlnlElcapitan.resource_location / "externals" rocm = self.spec.variants["rocm"][0] - gtl = self.spec.variants["gtl"][0] + # gtl = self.spec.variants["gtl"][0] compiler = self.spec.variants["compiler"][0] selections = [externals / "base" / "00-packages.yaml"] - if rocm == "543": - selections.append(externals / "rocm" / "00-version-543-packages.yaml") - elif rocm == "551": - selections.append(externals / "rocm" / "01-version-551-packages.yaml") + + rocm_cfg_path = self.next_adhoc_cfg() + with open(rocm_cfg_path, "w") as f: + f.write(self.rocm_config(rocm)) + selections.append(rocm_cfg_path) + + mpi_cfg_path = self.next_adhoc_cfg() + with open(mpi_cfg_path, "w") as f: + f.write(self.mpi_config("16.0.0")) + selections.append(mpi_cfg_path) if compiler == "cce": - if gtl == "true": - selections.append(externals / "mpi" / "02-cce-ygtl-packages.yaml") - else: - selections.append(externals / "mpi" / "01-cce-ngtl-packages.yaml") selections.append(externals / "libsci" / "01-cce-packages.yaml") elif compiler == "gcc": - selections.append(externals / "mpi" / "00-gcc-ngtl-packages.yaml") selections.append(externals / "libsci" / "00-gcc-packages.yaml") return selections @@ -97,15 +98,14 @@ def compiler_configs(self): compilers = LlnlElcapitan.resource_location / "compilers" compiler = self.spec.variants["compiler"][0] - # rocm = self.spec.variants["rocm"][0] + rocm = self.spec.variants["rocm"][0] selections = [] - # TODO: I'm not actually sure what compiler mixing is desired, if any - # so I don't think the choices here make much sense, but this - # demonstrate how system spec variants can be used to choose what - # configuration to construct if compiler == "cce": - selections.append(compilers / "rocm" / "00-rocm-551-compilers.yaml") + compiler_cfg_path = self.next_adhoc_cfg() + with open(compiler_cfg_path, "w") as f: + f.write(self.rocm_cce_compiler_cfg(rocm, "16.0.0")) + selections.append(compiler_cfg_path) elif compiler == "gcc": selections.append(compilers / "gcc" / "00-gcc-12-compilers.yaml") @@ -114,6 +114,182 @@ def compiler_configs(self): def system_specific_variables(self): return {"rocm_arch": self.rocm_arch} + def mpi_config(self, cce_version): + gtl = self.spec.variants["gtl"][0] + compiler = self.spec.variants["compiler"][0] + + short_cce_version = ".".join(cce_version.split(".")[:2]) + mpi_version = "8.1.26" + + if compiler == "cce": + dont_use_gtl = f"""\ + gtl_lib_path: /opt/cray/pe/mpich/{mpi_version}/gtl/lib + ldflags: "-L/opt/cray/pe/mpich/{mpi_version}/ofi/crayclang/{short_cce_version}/lib -lmpi -L/opt/cray/pe/mpich/{mpi_version}/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/{mpi_version}/gtl/lib" +""" + + use_gtl = f"""\ + gtl_cutoff_size: 4096 + fi_cxi_ats: 0 + gtl_lib_path: /opt/cray/pe/mpich/{mpi_version}/gtl/lib + ldflags: "-L/opt/cray/pe/mpich/{mpi_version}/ofi/crayclang/{short_cce_version}/lib -lmpi -L/opt/cray/pe/mpich/{mpi_version}/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/{mpi_version}/gtl/lib -lmpi_gtl_hsa" +""" + + if gtl: + gtl_spec = "+gtl" + gtl_cfg = use_gtl + else: + gtl_spec = "~gtl" + gtl_cfg = dont_use_gtl + + return f"""\ +packages: + cray-mpich: + externals: + - spec: cray-mpich@{mpi_version}%cce@{cce_version} {gtl_spec} +wrappers + prefix: /opt/cray/pe/mpich/{mpi_version}/ofi/crayclang/{short_cce_version} + extra_attributes: +{gtl_cfg} +""" + elif compiler == "gcc": + return """\ +packages: + cray-mpich: + externals: + - spec: cray-mpich@8.1.26%gcc@12.2.0 ~gtl +wrappers + prefix: /opt/cray/pe/mpich/8.1.26/ofi/gnu/10.3 + extra_attributes: + gtl_lib_path: /opt/cray/pe/mpich/8.1.26/gtl/lib + ldflags: "-L/opt/cray/pe/mpich/8.1.26/ofi/gnu/10.3/lib -lmpi -L/opt/cray/pe/mpich/8.1.26/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/8.1.26/gtl/lib" +""" + + def rocm_config(self, rocm_version): + template = """\ +packages: + hipfft: + externals: + - spec: hipfft@{x} + prefix: /opt/rocm-{x} + rocfft: + externals: + - spec: rocfft@{x} + prefix: /opt/rocm-{x} + rocprim: + externals: + - spec: rocprim@{x} + prefix: /opt/rocm-{x} + rocrand: + externals: + - spec: rocrand@{x} + prefix: /opt/rocm-{x}/hiprand + rocsparse: + externals: + - spec: rocsparse@{x} + prefix: /opt/rocm-{x} + rocthrust: + externals: + - spec: rocthrust@{x} + prefix: /opt/rocm-{x} + hip: + externals: + - spec: hip@{x} + prefix: /opt/rocm-{x} + hsa-rocr-dev: + externals: + - spec: hsa-rocr-dev@{x} + prefix: /opt/rocm-{x} + comgr: + externals: + - spec: comgr@{x} + prefix: /opt/rocm-{x}/ + hipsparse: + externals: + - spec: hipsparse@{x} + prefix: /opt/rocm-{x} + hipblas: + externals: + - spec: hipblas@{x} + prefix: /opt/rocm-{x}/ + hsakmt-roct: + externals: + - spec: hsakmt-roct@{x} + prefix: /opt/rocm-{x}/ + roctracer-dev-api: + externals: + - spec: roctracer-dev-api@{x} + prefix: /opt/rocm-{x}/ + rocminfo: + externals: + - spec: rocminfo@{x} + prefix: /opt/rocm-{x}/ + llvm: + externals: + - spec: llvm@15.0.0-{x} + prefix: /opt/rocm-{x}/llvm + llvm-amdgpu: + externals: + - spec: llvm-amdgpu@{x} + prefix: /opt/rocm-{x}/llvm + rocblas: + externals: + - spec: rocblas@{x} + prefix: /opt/rocm-{x} + rocsolver: + externals: + - spec: rocsolver@{x} + prefix: /opt/rocm-{x} +""" + return template.format(x=rocm_version) + + def rocm_cce_compiler_cfg(self, rocm_version, cce_version): + template = """\ +compilers: +- compiler: + spec: cce@{y}-rocm{x} + paths: + cc: /opt/cray/pe/cce/{y}/bin/craycc + cxx: /opt/cray/pe/cce/{y}/bin/crayCC + f77: /opt/cray/pe/cce/{y}/bin/crayftn + fc: /opt/cray/pe/cce/{y}/bin/crayftn + flags: + cflags: -g -O2 + cxxflags: -g -O2 -std=c++17 + fflags: -g -O2 -hnopattern + ldflags: -ldl + operating_system: rhel8 + target: x86_64 + modules: [] + environment: + prepend_path: + LD_LIBRARY_PATH: "/opt/cray/pe/cce/{y}/cce/x86_64/lib:/opt/rocm-{x}/lib" + extra_rpaths: [/opt/cray/pe/cce/{y}/cce/x86_64/lib/, /opt/cray/pe/gcc-libs/, /opt/rocm-{x}/lib] +- compiler: + spec: rocmcc@{x} + paths: + cc: /opt/rocm-{x}/bin/amdclang + cxx: /opt/rocm-{x}/bin/amdclang++ + f77: /opt/rocm-{x}/bin/amdflang + fc: /opt/rocm-{x}/bin/amdflang + flags: + cflags: -g -O2 + cxxflags: -g -O2 + operating_system: rhel8 + target: x86_64 + modules: [] + environment: + set: + RFE_811452_DISABLE: '1' + append_path: + LD_LIBRARY_PATH: /opt/cray/pe/gcc-libs + prepend_path: + LD_LIBRARY_PATH: "/opt/cray/pe/cce/{y}/cce/x86_64/lib:/opt/cray/pe/pmi/6.1.12/lib" + LIBRARY_PATH: /opt/rocm-{x}/lib + extra_rpaths: + - /opt/rocm-{x}/lib + - /opt/cray/pe/gcc-libs + - /opt/cray/pe/cce/{y}/cce/x86_64/lib +""" + return template.format(x=rocm_version, y=cce_version) + def sw_description(self): """This is somewhat vestigial: for the Tioga config that is committed to the repo, multiple instances of mpi/compilers are stored and diff --git a/systems/llnl-sierra/system.py b/systems/llnl-sierra/system.py index d840efcd..e785262e 100644 --- a/systems/llnl-sierra/system.py +++ b/systems/llnl-sierra/system.py @@ -3,9 +3,7 @@ # # SPDX-License-Identifier: Apache-2.0 -import os import pathlib -import tempfile from benchpark.directives import variant from benchpark.system import System @@ -123,17 +121,6 @@ def external_pkg_configs(self): return selections - def _adhoc_cfgs(self): - if not getattr(self, "_tmp_cfgs", None): - self._tmp_cfgs = tempfile.mkdtemp() - self._adhoc_cfg_idx = 0 - return self._tmp_cfgs - - def next_adhoc_cfg(self): - basedir = self._adhoc_cfgs() - self._adhoc_cfg_idx += 1 - return os.path.join(basedir, str(self._adhoc_cfg_idx)) - def compiler_configs(self): # values=("clang-ibm", "xl", "xl-gcc", "clang"), # values=("11-8-0", "10-1-243"),