diff --git a/images/accounting/slurmdbd_entrypoint.sh b/images/accounting/slurmdbd_entrypoint.sh index b16eec9c..c7899f11 100755 --- a/images/accounting/slurmdbd_entrypoint.sh +++ b/images/accounting/slurmdbd_entrypoint.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it echo "Bind-mount REST JWT key from K8S secret" touch /var/spool/slurmdbd/jwt_hs256.key diff --git a/images/common/scripts/install_docker.sh b/images/common/scripts/install_docker.sh index 1e996b5f..3e8b25c4 100644 --- a/images/common/scripts/install_docker.sh +++ b/images/common/scripts/install_docker.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it # Add Docker's official GPG key apt update -y diff --git a/images/common/scripts/install_docker_cli.sh b/images/common/scripts/install_docker_cli.sh index 84d1a059..9b79a195 100644 --- a/images/common/scripts/install_docker_cli.sh +++ b/images/common/scripts/install_docker_cli.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it # Add Docker's official GPG key apt update -y diff --git a/images/controller/slurmctld_entrypoint.sh b/images/controller/slurmctld_entrypoint.sh index 9d24ce1a..8976bce0 100755 --- a/images/controller/slurmctld_entrypoint.sh +++ b/images/controller/slurmctld_entrypoint.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it echo "Link users from jail" ln -s /mnt/jail/etc/passwd /etc/passwd diff --git a/images/jail/scripts/createuser.sh b/images/jail/scripts/createuser.sh index 87dd2761..0d9564db 100755 --- a/images/jail/scripts/createuser.sh +++ b/images/jail/scripts/createuser.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e +set -x # Print actual command before executing it if [[ $# -eq 0 ]] || [[ "$*" == *"-h"* ]] || [[ "$*" == *"--help"* ]]; then echo "Usage: createuser [--with-password] [--without-sudo] [--without-docker] []" diff --git a/images/login/sshd_entrypoint.sh b/images/login/sshd_entrypoint.sh index 90da1629..e3419512 100755 --- a/images/login/sshd_entrypoint.sh +++ b/images/login/sshd_entrypoint.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it echo "Link users from jail" ln -s /mnt/jail/etc/passwd /etc/passwd diff --git a/images/munge/munge_entrypoint.sh b/images/munge/munge_entrypoint.sh index 18ed87cf..00b76680 100644 --- a/images/munge/munge_entrypoint.sh +++ b/images/munge/munge_entrypoint.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it echo "Link users from jail" ln -s /mnt/jail/etc/passwd /etc/passwd diff --git a/images/nccl_benchmark/nccl_benchmark_entrypoint.sh b/images/nccl_benchmark/nccl_benchmark_entrypoint.sh index a016e16a..a8458f6b 100644 --- a/images/nccl_benchmark/nccl_benchmark_entrypoint.sh +++ b/images/nccl_benchmark/nccl_benchmark_entrypoint.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it echo "Link users from jail" ln -s /mnt/jail/etc/passwd /etc/passwd diff --git a/images/nccl_benchmark/scripts/srun_perf.sh b/images/nccl_benchmark/scripts/srun_perf.sh index f02c280f..29bc880a 100644 --- a/images/nccl_benchmark/scripts/srun_perf.sh +++ b/images/nccl_benchmark/scripts/srun_perf.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e +set -x # Print actual command before executing it while getopts ":b:e:f:g:t:l:d:u:h:p:n:s:m:w:c:q:" opt; do case ${opt} in diff --git a/images/restd/slurmrestd_entrypoint.sh b/images/restd/slurmrestd_entrypoint.sh index f742dcd2..464e34b0 100755 --- a/images/restd/slurmrestd_entrypoint.sh +++ b/images/restd/slurmrestd_entrypoint.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it echo "Bind-mount slurm configs from K8S config map" for file in /mnt/slurm-configs/*; do diff --git a/images/worker/scripts/gpu_healthcheck.sh b/images/worker/scripts/gpu_healthcheck.sh index c806f2e9..44ece149 100644 --- a/images/worker/scripts/gpu_healthcheck.sh +++ b/images/worker/scripts/gpu_healthcheck.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e +set -x # Print actual command before executing it # Run GPU healthcheck output=$(/usr/bin/nvidia-smi 2>&1) diff --git a/images/worker/slurmd_entrypoint.sh b/images/worker/slurmd_entrypoint.sh index 4a506ff7..36ba140c 100755 --- a/images/worker/slurmd_entrypoint.sh +++ b/images/worker/slurmd_entrypoint.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it echo "Evaluate variables in the Slurm node 'Extra' field" evaluated_extra=$(eval echo "$SLURM_NODE_EXTRA") diff --git a/images/worker/supervisord_entrypoint.sh b/images/worker/supervisord_entrypoint.sh index 4e82d108..f80dacb2 100644 --- a/images/worker/supervisord_entrypoint.sh +++ b/images/worker/supervisord_entrypoint.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e # Exit immediately if any command returns a non-zero error code +set -x # Print actual command before executing it echo "Starting slurmd entrypoint script" if [ -n "${CGROUP_V2}" ]; then @@ -8,7 +9,7 @@ if [ -n "${CGROUP_V2}" ]; then if [ -n "${CGROUP_PATH}" ]; then echo "cgroup v2 detected, creating cgroup for ${CGROUP_PATH}" - mkdir -p /sys/fs/cgroup/${CGROUP_PATH}/../system.slice + mkdir -p /sys/fs/cgroup/"${CGROUP_PATH}"/../system.slice # TODO: uncomment this line when 24.11 will be tested. It is OOMKillStep for taskPluginParam # echo "1" > /sys/fs/cgroup/${CGROUP_PATH}/../system.slice/memory.oom.group else