Skip to content

Commit

Permalink
WIP install 8/n: run installer, go see inside and detect success
Browse files Browse the repository at this point in the history
FIXME:
- specification of answerfile contents and dom0 cmdline is tightly linked,
  e.g. we rely on atexit=shell
- repeatedly polling with grep is junk
  • Loading branch information
ydirson committed May 15, 2024
1 parent 499b927 commit 80f791d
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 2 deletions.
18 changes: 16 additions & 2 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,20 +268,33 @@ def iso_remaster():
SOURCE_ISO = "/home/user/iso/xcp-ng-8.2.1-20231130.iso" # FIXME dict in data.py
ANSWERFILE_URL = "http://pxe/configs/custom/ydi/install-8.2-uefi-iso-ext.xml" # FIXME

from data import ISOSR_SRV, ISOSR_PATH, TOOLS
from data import ISOSR_SRV, ISOSR_PATH, TEST_SSH_PUBKEY, TOOLS
assert "iso-remaster" in TOOLS
iso_remaster = TOOLS["iso-remaster"]
assert os.access(iso_remaster, os.X_OK)

with tempfile.TemporaryDirectory() as isotmp:
remastered_iso = os.path.join(isotmp, "image.iso")
iso_patcher_script = os.path.join(isotmp, "iso-patcher")
img_patcher_script = os.path.join(isotmp, "img-patcher")

logging.info("Remastering %s to %s", SOURCE_ISO, remastered_iso)

# generate install.img-patcher script
with open(img_patcher_script, "xt") as patcher_fd:
# FIXME insert nswerfile in there
print(f"""#!/bin/bash
set -ex
INSTALLIMG="$1"
mkdir -p "$INSTALLIMG/root/.ssh"
echo "{TEST_SSH_PUBKEY}" > "$INSTALLIMG/root/.ssh/authorized_keys"
""",
file=patcher_fd)
os.chmod(patcher_fd.fileno(), 0o755)

# generate iso-patcher script
with open(iso_patcher_script, "xt") as patcher_fd:
passwd = "passw0rd" # FIXME hash
passwd = "passw0rd" # FIXME use invalid hash
print(f"""#!/bin/bash
set -ex
ISODIR="$1"
Expand All @@ -297,6 +310,7 @@ def iso_remaster():

# do remaster
local_cmd([iso_remaster,
"--install-patcher", img_patcher_script,
"--iso-patcher", iso_patcher_script,
SOURCE_ISO, remastered_iso
])
Expand Down
3 changes: 3 additions & 0 deletions data.py-dist
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
HOST_DEFAULT_USER = "root"
HOST_DEFAULT_PASSWORD = ""

# Public key for a private key available to the test runner
TEST_SSH_PUBKEY = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDKz9uQOoxq6Q0SQ0XTzQHhDolvuo/7EyrDZsYQbRELhcPJG8MT/o5u3HyJFhIP2+HqBSXXgmqRPJUkwz9wUwb2sUwf44qZm/pyPUWOoxyVtrDXzokU/uiaNKUMhbnfaXMz6Ogovtjua63qld2+ZRXnIgrVtYKtYBeu/qKGVSnf4FTOUKl1w3uKkr59IUwwAO8ay3wVnxXIHI/iJgq6JBgQNHbn3C/SpYU++nqL9G7dMyqGD36QPFuqH/cayL8TjNZ67TgAzsPX8OvmRSqjrv3KFbeSlpS/R4enHkSemhgfc8Z2f49tE7qxWZ6x4Uyp5E6ur37FsRf/tEtKIUJGMRXN XCP-ng CI"

# The following prefix will be added to the `name-label` parameter of XAPI objects
# that the tests will create or import, such as VMs and SRs.
# Default value: [your login/user]
Expand Down
138 changes: 138 additions & 0 deletions tests/install/test_install.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import logging
import os
import pytest
import time

from lib import commands, pxe
from lib.common import wait_for
from lib.host import Host
from lib.pool import Pool

@pytest.mark.vm_definitions(
dict(name="vm 1",
Expand All @@ -21,5 +28,136 @@ def test_install_nested_821_uefi(self, create_vms, iso_remaster):
assert len(create_vms) == 1
host_vm = create_vms[0]

vif = host_vm.vifs()[0]
mac_address = vif.param_get('MAC')
logging.info("Host VM has MAC %s", mac_address)

host_vm.create_cd_vbd("xvdd")
host_vm.insert_cd(iso_remaster)

host_vm.start()
wait_for(host_vm.is_running, "Wait for host VM running")
try:
# catch host-vm IP address
wait_for(lambda: pxe.arp_addresses_for(mac_address),
"Wait for DHCP server to see Host VM in ARP tables",
timeout_secs=10*60)
ips = pxe.arp_addresses_for(mac_address)
logging.info("Host VM has IPs %s", ips)
assert len(ips) == 1
host_vm.ip = ips[0]

host_vm.ssh(["ls"])
logging.info("ssh works")

# wait for "yum install" phase to finish
wait_for(lambda: host_vm.ssh(["grep",
"'DISPATCH: NEW PHASE: Completing installation'",
"/tmp/install-log"],
check=False, simple_output=False,
).returncode == 0,
"Wait for rpm installation to succeed",
timeout_secs=40*60) # FIXME too big

# wait for install to finish
wait_for(lambda: host_vm.ssh(["grep",
"'The installation completed successfully'",
"/tmp/install-log"],
check=False, simple_output=False,
).returncode == 0,
"Wait for installation to succeed",
timeout_secs=40*60) # FIXME too big

# powercycle, catch any change of IP
logging.info("Rebooting Host VM after successful installation")
try:
# use "poweroff" because "reboot" would cause ARP and
# SSH to be checked before host is down, and require
# ssh retries
host_vm.ssh(["poweroff"])
except commands.SSHCommandFailed as e:
# ignore connection closed by reboot
if e.returncode == 255 and "closed by remote host" in e.stdout:
logging.info("sshd closed the connection")
pass
else:
raise
wait_for(host_vm.is_halted, "Wait for host VM halted")
host_vm.eject_cd()

# FIXME: make a snapshot here

# FIXME: evict MAC from ARP cache first?
host_vm.start()
wait_for(host_vm.is_running, "Wait for host VM running")

ips = pxe.arp_addresses_for(mac_address)
logging.info("Host VM has IPs %s", ips)
assert len(ips) == 1
host_vm.ip = ips[0]

wait_for(lambda: not os.system(f"nc -zw5 {host_vm.ip} 22"),
"Wait for ssh back up on Host VM", retry_delay_secs=5)

# pool master must be reachable here
# FIXME: not sure why we seem to need this, while port 22 has been seen open
tries = 5
while True:
try:
pool = Pool(host_vm.ip)
except commands.SSHCommandFailed as e:
if "Connection refused" not in e.stdout:
raise
tries -= 1
if tries:
logging.warning("retrying connection to pool master")
time.sleep(2)
continue
# retries failed
raise
# it worked!
break

# wait for XAPI
# FIXME: flaky, must check logs extraction on failure
for service in ["control-domain-params-init",
"network-init",
"storage-init",
"generate-iscsi-iqn",
"create-guest-templates",
]:
try:
wait_for(lambda: pool.master.ssh(["test", "-e", f"/var/lib/misc/ran-{service}"],
check=False, simple_output=False,
).returncode == 0,
f"Wait for ran-{service} stamp")
except TimeoutError:
logging.warning("investigating lack of ran-{service} stamp")
out = pool.master.ssh(["systemctl", "status", service], check=False)
logging.warning("service status: %s", out)
out = pool.master.ssh(["grep", "-r", service, "/var/log"], check=False)
logging.warning("in logs: %s", out)

wait_for(pool.master.is_enabled, "Wait for XAPI to be ready", timeout_secs=30 * 60)

logging.info("Powering off pool master")
try:
# use "poweroff" because "reboot" would cause ARP and
# SSH to be checked before host is down, and require
# ssh retries
pool.master.ssh(["poweroff"])
except commands.SSHCommandFailed as e:
# ignore connection closed by reboot
if e.returncode == 255 and "closed by remote host" in e.stdout:
logging.info("sshd closed the connection")
pass
else:
raise

wait_for(host_vm.is_halted, "Wait for host VM halted")

except Exception as e:
logging.critical("caught exception %s", e)
#wait_for(lambda: False, 'Wait "forever"', timeout_secs=100*60)
host_vm.shutdown(force=True)
raise

0 comments on commit 80f791d

Please sign in to comment.