From d4487db18301492c3fddd536d4d25f008d728185 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Tue, 12 Nov 2024 22:18:21 +0000 Subject: [PATCH] handle absence of specs during migration --- bin/propolis-cli/src/main.rs | 9 +- bin/propolis-server/src/lib/vm/mod.rs | 128 +++++++++++++++++--------- crates/propolis-api-types/src/lib.rs | 9 +- openapi/propolis-server.json | 38 +++++++- phd-tests/tests/src/cpuid.rs | 11 ++- phd-tests/tests/src/smoke.rs | 8 +- 6 files changed, 151 insertions(+), 52 deletions(-) diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs index ba00f0a87..152e95178 100644 --- a/bin/propolis-cli/src/main.rs +++ b/bin/propolis-cli/src/main.rs @@ -15,7 +15,9 @@ use anyhow::{anyhow, Context}; use clap::{Parser, Subcommand}; use futures::{future, SinkExt}; use newtype_uuid::{GenericUuid, TypedUuid, TypedUuidKind, TypedUuidTag}; -use propolis_client::types::{InstanceMetadata, VersionedInstanceSpec}; +use propolis_client::types::{ + InstanceMetadata, InstanceSpecStatus, VersionedInstanceSpec, +}; use slog::{o, Drain, Level, Logger}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio_tungstenite::tungstenite::{ @@ -504,7 +506,10 @@ async fn migrate_instance( anyhow!("failed to get src instance properties") })?; let src_uuid = src_instance.properties.id; - let VersionedInstanceSpec::V0(spec) = &src_instance.spec; + let InstanceSpecStatus::Present(spec) = &src_instance.spec else { + anyhow::bail!("source instance doesn't have a spec yet"); + }; + let VersionedInstanceSpec::V0(spec) = &spec; let request = InstanceEnsureRequest { properties: InstanceProperties { diff --git a/bin/propolis-server/src/lib/vm/mod.rs b/bin/propolis-server/src/lib/vm/mod.rs index abb6cfcff..258c1976f 100644 --- a/bin/propolis-server/src/lib/vm/mod.rs +++ b/bin/propolis-server/src/lib/vm/mod.rs @@ -87,8 +87,8 @@ use oximeter::types::ProducerRegistry; use propolis_api_types::{ instance_spec::VersionedInstanceSpec, InstanceEnsureResponse, InstanceMigrateStatusResponse, InstanceMigrationStatus, InstanceProperties, - InstanceSpecGetResponse, InstanceState, InstanceStateMonitorResponse, - MigrationState, + InstanceSpecGetResponse, InstanceSpecStatus, InstanceState, + InstanceStateMonitorResponse, MigrationState, }; use slog::info; use state_driver::StateDriverOutput; @@ -204,6 +204,29 @@ struct VmInner { driver: Option>, } +/// Stores a possibly-absent instance spec with a reason for its absence. +#[derive(Clone, Debug)] +enum MaybeSpec { + Present(Spec), + + /// The spec is not known yet because the VM is initializing via live + /// migration, and the source's spec is not available yet. + WaitingForMigrationSource, +} + +impl From for InstanceSpecStatus { + fn from(value: MaybeSpec) -> Self { + match value { + MaybeSpec::WaitingForMigrationSource => { + Self::WaitingForMigrationSource + } + MaybeSpec::Present(spec) => { + Self::Present(VersionedInstanceSpec::V0(spec.into())) + } + } + } +} + /// Describes a past or future VM and its properties. struct VmDescription { /// Records the VM's last externally-visible state. @@ -212,9 +235,6 @@ struct VmDescription { /// The VM's API-level instance properties. properties: InstanceProperties, - /// The VM's last-known instance specification. - spec: Spec, - /// The runtime on which the VM's state driver is running (or on which it /// ran). tokio_rt: Option, @@ -228,17 +248,17 @@ enum VmState { /// A new state driver is attempting to initialize objects for a VM with the /// ecnlosed description. - WaitingForInit(VmDescription), + WaitingForInit { vm: VmDescription, spec: MaybeSpec }, /// The VM is active, and callers can obtain a handle to its objects. Active(active::ActiveVm), /// The previous VM is shutting down, but its objects have not been fully /// destroyed yet. - Rundown(VmDescription), + Rundown { vm: VmDescription, spec: Spec }, /// The previous VM and its objects have been cleaned up. - RundownComplete(VmDescription), + RundownComplete { vm: VmDescription, spec: MaybeSpec }, } impl std::fmt::Display for VmState { @@ -248,10 +268,10 @@ impl std::fmt::Display for VmState { "{}", match self { Self::NoVm => "NoVm", - Self::WaitingForInit(_) => "WaitingForInit", + Self::WaitingForInit { .. } => "WaitingForInit", Self::Active(_) => "Active", - Self::Rundown(_) => "Rundown", - Self::RundownComplete(_) => "RundownComplete", + Self::Rundown { .. } => "Rundown", + Self::RundownComplete { .. } => "RundownComplete", } ) } @@ -329,20 +349,26 @@ impl Vm { let state = vm.external_state_rx.borrow().clone(); Some(InstanceSpecGetResponse { properties: vm.properties.clone(), - spec: VersionedInstanceSpec::V0(spec.into()), + spec: InstanceSpecStatus::Present( + VersionedInstanceSpec::V0(spec.into()), + ), state: state.state, }) } - - // If the VM is not active yet, or there is only a - // previously-run-down VM, return the state saved in the state - // machine. - VmState::WaitingForInit(vm) - | VmState::Rundown(vm) - | VmState::RundownComplete(vm) => Some(InstanceSpecGetResponse { + VmState::WaitingForInit { vm, spec } + | VmState::RundownComplete { vm, spec } => { + Some(InstanceSpecGetResponse { + properties: vm.properties.clone(), + state: vm.external_state_rx.borrow().state, + spec: spec.clone().into(), + }) + } + VmState::Rundown { vm, spec } => Some(InstanceSpecGetResponse { properties: vm.properties.clone(), state: vm.external_state_rx.borrow().state, - spec: VersionedInstanceSpec::V0(vm.spec.clone().into()), + spec: InstanceSpecStatus::Present(VersionedInstanceSpec::V0( + spec.clone().into(), + )), }), } } @@ -359,9 +385,9 @@ impl Vm { match &guard.state { VmState::NoVm => None, VmState::Active(vm) => Some(vm.external_state_rx.clone()), - VmState::WaitingForInit(vm) - | VmState::Rundown(vm) - | VmState::RundownComplete(vm) => { + VmState::WaitingForInit { vm, .. } + | VmState::Rundown { vm, .. } + | VmState::RundownComplete { vm, .. } => { Some(vm.external_state_rx.clone()) } } @@ -386,7 +412,7 @@ impl Vm { let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); match old { - VmState::WaitingForInit(vm) => { + VmState::WaitingForInit { vm, .. } => { guard.state = VmState::Active(ActiveVm { log: log.clone(), state_driver_queue, @@ -417,8 +443,8 @@ impl Vm { let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); match old { - VmState::WaitingForInit(vm) => { - guard.state = VmState::RundownComplete(vm) + VmState::WaitingForInit { vm, spec } => { + guard.state = VmState::RundownComplete { vm, spec } } state => unreachable!( "start failures should only occur before an active VM is \ @@ -449,12 +475,14 @@ impl Vm { let spec = vm.objects().lock_shared().await.instance_spec().clone(); let ActiveVm { external_state_rx, properties, tokio_rt, .. } = vm; - guard.state = VmState::Rundown(VmDescription { - external_state_rx, - properties, + guard.state = VmState::Rundown { + vm: VmDescription { + external_state_rx, + properties, + tokio_rt: Some(tokio_rt), + }, spec, - tokio_rt: Some(tokio_rt), - }); + }; vm.services }; @@ -473,9 +501,12 @@ impl Vm { let mut guard = self.inner.write().await; let old = std::mem::replace(&mut guard.state, VmState::NoVm); let rt = match old { - VmState::Rundown(mut vm) => { + VmState::Rundown { mut vm, spec } => { let rt = vm.tokio_rt.take().expect("rundown VM has a runtime"); - guard.state = VmState::RundownComplete(vm); + guard.state = VmState::RundownComplete { + vm, + spec: MaybeSpec::Present(spec), + }; rt } state => unreachable!( @@ -551,16 +582,25 @@ impl Vm { { let mut guard = self.inner.write().await; match guard.state { - VmState::WaitingForInit(_) => { - return Err(VmError::WaitingToInitialize); + VmState::WaitingForInit { .. } => { + return Err(VmError::WaitingToInitialize) + } + VmState::Active { .. } => { + return Err(VmError::AlreadyInitialized) + } + VmState::Rundown { .. } => { + return Err(VmError::RundownInProgress) } - VmState::Active(_) => return Err(VmError::AlreadyInitialized), - VmState::Rundown(_) => return Err(VmError::RundownInProgress), _ => {} }; let properties = ensure_request.properties.clone(); - let spec = ensure_request.instance_spec.clone(); + let spec = if ensure_request.migrate.is_some() { + MaybeSpec::WaitingForMigrationSource + } else { + MaybeSpec::Present(ensure_request.instance_spec.clone()) + }; + let vm_for_driver = self.clone(); guard.driver = Some(tokio::spawn(async move { state_driver::run_state_driver( @@ -574,12 +614,14 @@ impl Vm { .await })); - guard.state = VmState::WaitingForInit(VmDescription { - external_state_rx: external_rx.clone(), - properties, + guard.state = VmState::WaitingForInit { + vm: VmDescription { + external_state_rx: external_rx.clone(), + properties, + tokio_rt: None, + }, spec, - tokio_rt: None, - }); + }; } // Wait for the state driver task to dispose of this request. diff --git a/crates/propolis-api-types/src/lib.rs b/crates/propolis-api-types/src/lib.rs index 44bd7a68d..db007667c 100644 --- a/crates/propolis-api-types/src/lib.rs +++ b/crates/propolis-api-types/src/lib.rs @@ -165,11 +165,18 @@ pub struct InstanceGetResponse { pub instance: Instance, } +#[derive(Clone, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", content = "value")] +pub enum InstanceSpecStatus { + WaitingForMigrationSource, + Present(VersionedInstanceSpec), +} + #[derive(Clone, Deserialize, Serialize, JsonSchema)] pub struct InstanceSpecGetResponse { pub properties: InstanceProperties, pub state: InstanceState, - pub spec: VersionedInstanceSpec, + pub spec: InstanceSpecStatus, } #[derive(Clone, Deserialize, Serialize, JsonSchema)] diff --git a/openapi/propolis-server.json b/openapi/propolis-server.json index 964b630c8..aba9a1c03 100644 --- a/openapi/propolis-server.json +++ b/openapi/propolis-server.json @@ -1431,7 +1431,7 @@ "$ref": "#/components/schemas/InstanceProperties" }, "spec": { - "$ref": "#/components/schemas/VersionedInstanceSpec" + "$ref": "#/components/schemas/InstanceSpecStatus" }, "state": { "$ref": "#/components/schemas/InstanceState" @@ -1443,6 +1443,42 @@ "state" ] }, + "InstanceSpecStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "WaitingForMigrationSource" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "Present" + ] + }, + "value": { + "$ref": "#/components/schemas/VersionedInstanceSpec" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, "InstanceSpecV0": { "type": "object", "properties": { diff --git a/phd-tests/tests/src/cpuid.rs b/phd-tests/tests/src/cpuid.rs index 3ec773761..9d1a4e7c0 100644 --- a/phd-tests/tests/src/cpuid.rs +++ b/phd-tests/tests/src/cpuid.rs @@ -4,7 +4,9 @@ use cpuid_utils::{CpuidIdent, CpuidValues, CpuidVendor}; use phd_testcase::*; -use propolis_client::types::CpuidEntry; +use propolis_client::types::{ + CpuidEntry, InstanceSpecStatus, VersionedInstanceSpec, +}; use tracing::info; fn cpuid_entry( @@ -34,8 +36,11 @@ async fn cpuid_instance_spec_round_trip_test(ctx: &Framework) { vm.launch().await?; let spec_get_response = vm.get_spec().await?; - let propolis_client::types::VersionedInstanceSpec::V0(spec) = - spec_get_response.spec; + let InstanceSpecStatus::Present(VersionedInstanceSpec::V0(spec)) = + spec_get_response.spec + else { + panic!("instance spec should be present for a running VM"); + }; let cpuid = spec.board.cpuid.expect("board should have explicit CPUID"); assert_eq!(cpuid.entries.len(), entries.len()); diff --git a/phd-tests/tests/src/smoke.rs b/phd-tests/tests/src/smoke.rs index 592145b60..b3a14d3d4 100644 --- a/phd-tests/tests/src/smoke.rs +++ b/phd-tests/tests/src/smoke.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use phd_testcase::*; +use propolis_client::types::{InstanceSpecStatus, VersionedInstanceSpec}; #[phd_testcase] async fn nproc_test(ctx: &Framework) { @@ -46,8 +47,11 @@ async fn instance_spec_get_test(ctx: &Framework) { vm.launch().await?; let spec_get_response = vm.get_spec().await?; - let propolis_client::types::VersionedInstanceSpec::V0(spec) = - spec_get_response.spec; + let InstanceSpecStatus::Present(spec) = spec_get_response.spec else { + panic!("launched instance should have a spec"); + }; + + let VersionedInstanceSpec::V0(spec) = spec; assert_eq!(spec.board.cpus, 4); assert_eq!(spec.board.memory_mb, 3072); }