diff --git a/common/src/types.rs b/common/src/types.rs index f9ea79e..84ccdae 100644 --- a/common/src/types.rs +++ b/common/src/types.rs @@ -149,7 +149,12 @@ pub struct UnitStatus { impl UnitStatus { pub fn is_running(&self) -> bool { - self.active_state == "active" + self.active_state == "active" && self.load_state == "loaded" && self.sub_state == "running" + } + pub fn is_exitted(&self) -> bool { + self.active_state == "inactive" + && self.load_state == "not-found" + && self.sub_state == "dead" } } diff --git a/nixos/tests/admin.nix b/nixos/tests/admin.nix index b783882..8ba7825 100644 --- a/nixos/tests/admin.nix +++ b/nixos/tests/admin.nix @@ -200,7 +200,10 @@ in admin = adminSettings; tls = mkTls "chromium-vm"; applications = lib.mkForce ( - builtins.toJSON { "foot" = "/run/current-system/sw/bin/run-waypipe ${pkgs.foot}/bin/foot"; } + builtins.toJSON { + "foot" = "/run/current-system/sw/bin/run-waypipe ${pkgs.foot}/bin/foot"; + "clearexit" = "/run/current-system/sw/bin/sleep 5"; + } ); }; }; @@ -288,10 +291,23 @@ in swaymsg("exec ssh -R /tmp/vsock:/tmp/vsock -f -N ${addrs.appvm}") time.sleep(5) # Give ssh some time to setup remote socket - #swaymsg("exec run-waypipe foot") - print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot")) - time.sleep(10) # Give few seconds to application to spin up - wait_for_window("ghaf@appvm") + with subtest("Clean run"): + print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot")) + time.sleep(10) # Give few seconds to application to spin up + wait_for_window("ghaf@appvm") + + with subtest("crash and restart"): + # Crash application + appvm.succeed("pkill foot") + time.sleep(10) + # .. then ask to restart + print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot")) + wait_for_window("ghaf@appvm") + + with subtest("clear exit and restart"): + print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start --vm foot-vm clearexit")) + time.sleep(20) # Give few seconds to application to spin up, exit, then start it again + print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start --vm foot-vm clearexit")) ''; }; }; diff --git a/src/admin/server.rs b/src/admin/server.rs index 6ccd5ee..0b23bb1 100644 --- a/src/admin/server.rs +++ b/src/admin/server.rs @@ -160,7 +160,9 @@ impl AdminServiceImpl { pub async fn handle_error(&self, entry: RegistryEntry) -> anyhow::Result<()> { match (entry.r#type.vm, entry.r#type.service) { (VmType::AppVM, ServiceType::App) => { - self.registry.deregister(&entry.name)?; + if entry.status.is_exitted() { + self.registry.deregister(&entry.name)?; + } Ok(()) } (VmType::AppVM, ServiceType::Mgr) | (VmType::SysVM, ServiceType::Mgr) => { @@ -184,11 +186,7 @@ impl AdminServiceImpl { debug!("Monitoring {}...", &entry.name); match self.get_remote_status(&entry).await { Err(err) => { - error!( - "could not get status of unit {}: {}", - entry.name.clone(), - err - ); + error!("could not get status of unit {}: {}", &entry.name, err); self.handle_error(entry) .await .with_context(|| "during handle error")?