From a6637fdee719a3d8e5aae209fa593aebdccef191 Mon Sep 17 00:00:00 2001 From: AWS ParallelCluster user Date: Mon, 20 Nov 2023 18:29:48 +0000 Subject: [PATCH 1/3] Adding support for AWS x86 ParallelCluster 3.7.2 w/EFA --- .../AWS-x86-ParallelCluster-3.7.2/README.txt | 29 +++++++++++++++++++ .../auxiliary_software_files/compilers.yaml | 14 +++++++++ .../auxiliary_software_files/packages.yaml | 27 +++++++++++++++++ .../AWS-x86-ParallelCluster-3.7.2/spack.yaml | 12 ++++++++ .../variables.yaml | 7 +++++ 5 files changed, 89 insertions(+) create mode 100644 configs/AWS-x86-ParallelCluster-3.7.2/README.txt create mode 100644 configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/compilers.yaml create mode 100644 configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/packages.yaml create mode 100644 configs/AWS-x86-ParallelCluster-3.7.2/spack.yaml create mode 100644 configs/AWS-x86-ParallelCluster-3.7.2/variables.yaml diff --git a/configs/AWS-x86-ParallelCluster-3.7.2/README.txt b/configs/AWS-x86-ParallelCluster-3.7.2/README.txt new file mode 100644 index 000000000..165d7a18f --- /dev/null +++ b/configs/AWS-x86-ParallelCluster-3.7.2/README.txt @@ -0,0 +1,29 @@ +AWS x86 ParallelCluster 3.7.2 +----------------------------- + +This config should work on any AWS x86 ParallelCluster 3.7.2 instance with the +following caveats: + +1) All compute instances must be x86 and EFA enabled. Supported instances types + can be found here: + + https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-instance-types + +2) Any OS supported by ParallelCluster 3.7.2 should work, but only Amazon + Linux 2 has been tested + +3) ParallelCluster does not install optimized versions of BLAS/LAPACK. This + config uses the generic versions installed via: + + sudo yum install lapack + +3) OpenMPI is the only supported MPI flavor. InelMPI is not yet supported. + + OpenMPI is running in verbose mode so the user than confirm that EFA is + being used when running experiments. A line similar to the following in + slurm-NNN.out confirms EFA is being used: + + mtl_ofi_component.c:362: mtl:ofi:provider: rdmap0s6-rdm + + This debugging output can be silenced by removing the env variable + 'OMPI_MCA_mtl_base_verbose=100' from the srun line in variables.yaml. diff --git a/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/compilers.yaml b/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/compilers.yaml new file mode 100644 index 000000000..b6ea01cd9 --- /dev/null +++ b/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/compilers.yaml @@ -0,0 +1,14 @@ +compilers: +- compiler: + spec: gcc@7.3.1 + paths: + cc: /usr//bin/gcc + cxx: /usr/bin/g++ + f77: /usr/bin/gfortran + fc: /usr/bin/gfortran + flags: {} + operating_system: alinux2 + target: x86_64 + modules: [] + environment: {} + extra_rpaths: [] diff --git a/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/packages.yaml b/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/packages.yaml new file mode 100644 index 000000000..248ea0e2b --- /dev/null +++ b/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/packages.yaml @@ -0,0 +1,27 @@ +packages: + tar: + externals: + - spec: tar@1.26 + prefix: /usr + buildable: false + gmake: + externals: + - spec: gmake@3.8.2 + prefix: /usr + blas: + externals: + - spec: blas@3.4.2 + prefix: /usr + buildable: false + lapack: + externals: + - spec: lapack@3.4.2 + prefix: /usr + buildable: false + mpi: + externals: + - spec: openmpi@4.1.5-gcc731 + prefix: /opt/amazon/openmpi + buildable: false + extra_attributes: + ldflags: "-L/opt/amazon/openmpi/lib -lmpi" diff --git a/configs/AWS-x86-ParallelCluster-3.7.2/spack.yaml b/configs/AWS-x86-ParallelCluster-3.7.2/spack.yaml new file mode 100644 index 000000000..69c35d334 --- /dev/null +++ b/configs/AWS-x86-ParallelCluster-3.7.2/spack.yaml @@ -0,0 +1,12 @@ +spack: + packages: + default-compiler: + spack_spec: gcc@7.3.1 + default-mpi: + spack_spec: openmpi@4.1.5-gcc731 + compiler-gcc: + spack_spec: gcc@7.3.1 + lapack: + spack_spec: lapack@3.4.2 + mpi-gcc: + spack_spec: openmpi@4.1.5-gcc731 diff --git a/configs/AWS-x86-ParallelCluster-3.7.2/variables.yaml b/configs/AWS-x86-ParallelCluster-3.7.2/variables.yaml new file mode 100644 index 000000000..b8a404d5b --- /dev/null +++ b/configs/AWS-x86-ParallelCluster-3.7.2/variables.yaml @@ -0,0 +1,7 @@ +variables: + batch_time: '02:00' + mpi_command: 'srun -N {n_nodes} -n {n_ranks} --mpi=pmix --export=ALL,FI_EFA_USE_DEVICE_RDMA=1,FI_PROVIDER="efa",OMPI_MCA_mtl_base_verbose=100' + batch_submit: 'sbatch {execute_experiment}' + batch_nodes: '#SBATCH -N {n_nodes}' + batch_ranks: '#SBATCH -n {n_ranks}' + batch_timeout: '#SBATCH -t {batch_time}:00' From d35a7f3dd86e54ad424aa1b0f81e44a556317af1 Mon Sep 17 00:00:00 2001 From: pearce8 Date: Tue, 28 Nov 2023 08:09:52 -0600 Subject: [PATCH 2/3] Update compilers.yaml --- .../auxiliary_software_files/compilers.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/compilers.yaml b/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/compilers.yaml index b6ea01cd9..05b641ee7 100644 --- a/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/compilers.yaml +++ b/configs/AWS-x86-ParallelCluster-3.7.2/auxiliary_software_files/compilers.yaml @@ -2,7 +2,7 @@ compilers: - compiler: spec: gcc@7.3.1 paths: - cc: /usr//bin/gcc + cc: /usr/bin/gcc cxx: /usr/bin/g++ f77: /usr/bin/gfortran fc: /usr/bin/gfortran From 04fcb63b31f3039866e226fa131f7d35b8d4aac7 Mon Sep 17 00:00:00 2001 From: pearce8 Date: Tue, 28 Nov 2023 08:11:45 -0600 Subject: [PATCH 3/3] Update README.txt --- configs/AWS-x86-ParallelCluster-3.7.2/README.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/AWS-x86-ParallelCluster-3.7.2/README.txt b/configs/AWS-x86-ParallelCluster-3.7.2/README.txt index 165d7a18f..dea80ad5c 100644 --- a/configs/AWS-x86-ParallelCluster-3.7.2/README.txt +++ b/configs/AWS-x86-ParallelCluster-3.7.2/README.txt @@ -4,7 +4,7 @@ AWS x86 ParallelCluster 3.7.2 This config should work on any AWS x86 ParallelCluster 3.7.2 instance with the following caveats: -1) All compute instances must be x86 and EFA enabled. Supported instances types +1) All compute instances must be x86 and EFA enabled. Supported instance types can be found here: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-instance-types @@ -17,7 +17,7 @@ following caveats: sudo yum install lapack -3) OpenMPI is the only supported MPI flavor. InelMPI is not yet supported. +3) OpenMPI is the only supported MPI flavor. IntelMPI is not yet supported. OpenMPI is running in verbose mode so the user than confirm that EFA is being used when running experiments. A line similar to the following in