From 3242d971bf7c376ed408704e8faac748914284b2 Mon Sep 17 00:00:00 2001 From: Petr Stodulka Date: Fri, 3 May 2024 14:44:51 +0200 Subject: [PATCH] mount /usr: Implement try-sleep loop - add time for storage initialisation This problem is typical for SAN + FC when the storage needs sometimes more time for the initialisation. Implemented try-sleep loop. Retry the activation of the storage + /usr mounting in 15s. The loop can be repeated 10 times, so total time is 150s right now for the activation. Note that this is not proper solution for the storage initialisation, however we have discovered some obstacles in the bootup process to be able to do it correctly as we would like to. Regarding limited time, we are going to deliver this solution, that should improve the experience and should be safe to not cause regressions for already working functionality. We expect to provide better solution for newer upgrades paths in future (IPU 8->9 and newer). jira: https://issues.redhat.com/browse/RHEL-3344 --- .../dracut/85sys-upgrade-redhat/mount_usr.sh | 85 +++++++++++++++---- 1 file changed, 69 insertions(+), 16 deletions(-) diff --git a/repos/system_upgrade/common/actors/commonleappdracutmodules/files/dracut/85sys-upgrade-redhat/mount_usr.sh b/repos/system_upgrade/common/actors/commonleappdracutmodules/files/dracut/85sys-upgrade-redhat/mount_usr.sh index 3c52652f32..8c97bef418 100755 --- a/repos/system_upgrade/common/actors/commonleappdracutmodules/files/dracut/85sys-upgrade-redhat/mount_usr.sh +++ b/repos/system_upgrade/common/actors/commonleappdracutmodules/files/dracut/85sys-upgrade-redhat/mount_usr.sh @@ -22,6 +22,15 @@ filtersubvol() { mount_usr() { + # + # mount_usr [true | false] + # Return 0 when everything is allright + # In case of failure and /usr has been detected: + # return 2 not called with "true" (and emergency shell has been invoked) + # (note: possibly it's nonsense, but to be sure..) + # return 1 otherwise + # + local _drop_to_shell_on_fail="$1" # check, if we have to mount the /usr filesystem while read -r _dev _mp _fs _opts _freq _passno; do [ "${_dev%%#*}" != "$_dev" ] && continue @@ -60,25 +69,69 @@ mount_usr() fi done < "${NEWROOT}/etc/fstab" >> /etc/fstab - if [ "$_usr_found" != "" ]; then - info "Mounting /usr with -o $_opts" - mount "${NEWROOT}/usr" 2>&1 | vinfo - mount -o remount,rw "${NEWROOT}/usr" + if [ "$_usr_found" = "" ]; then + # nothing to do + return 0 + fi - if ! ismounted "${NEWROOT}/usr"; then - warn "Mounting /usr to ${NEWROOT}/usr failed" - warn "*** Dropping you to a shell; the system will continue" - warn "*** when you leave the shell." - action_on_fail - fi + info "Mounting /usr with -o $_opts" + mount "${NEWROOT}/usr" 2>&1 | vinfo + mount -o remount,rw "${NEWROOT}/usr" + + if [ ismounted "${NEWROOT}/usr" ]; then + # success!! + return 0 fi + + if [ "$_drop_to_shell_on_fail" = "true" ]; then + warn "Mounting /usr to ${NEWROOT}/usr failed" + warn "*** Dropping you to a shell; the system will continue" + warn "*** when you leave the shell." + action_on_fail + return 2 + fi + + return 1 } -if [ -f "${NEWROOT}/etc/fstab" ]; then - # In case we have the LVM command available try make it activate all partitions - if command -v lvm 2>/dev/null 1>/dev/null; then - lvm vgchange -a y + +try_to_mount_usr() { + if [ ! -f "${NEWROOT}/etc/fstab" ]; then + warn "File ${NEWROOT}/etc/fstab doesn't exist." + return 1 + fi + + # In case we have the LVM command available try make it activate all partitions + if command -v lvm 2>/dev/null 1>/dev/null; then + lvm vgchange -a y || { + warn "Detected problem when tried to activate LVM VG." + if [ "$1" != "true" ]; + # this is not last execution, retry + return 1 + fi + # NOTE(pstodulk): + # last execution ($1 = "true"), call mount_usr anyway + # I am not 100% about lvm vgchange exit codes and I am aware of + # possible warnings, in this last run, let's keep it on mount_usr + # anyway.. + } + fi + + mount_usr "$1" +} + +_sleep_timeout=15 +for i in {0..11}; do + try_to_mount_usr && break + if [ $i -eq 11 ]; then + warn "The last attempt to initialize storage has not been successful." + warn "Unknown state of the storage. It is possible that upgrade will be stopped." + break fi - mount_usr -fi + # something is wrong. In some cases, storage needs more time for the + # initialisation - especially in case of SAN. try again + warn "Failed attempt to initialize the storage. Retry in $_sleep_timeout seconds. Attempt: $0 of 10" + sleep $_sleep_timeout +done +