From 772e64fa9c8774dbc90a037ead09b59107cf8b56 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 30 Jan 2025 12:29:43 -0800 Subject: [PATCH 1/4] [nexus] Consider Affinity/Anti-Affinity Groups during instance placement --- nexus/db-queries/src/db/datastore/sled.rs | 1328 ++++++++++++++++- nexus/db-queries/src/db/queries/affinity.rs | 608 ++++++++ nexus/db-queries/src/db/queries/mod.rs | 1 + .../src/policy_test/resource_builder.rs | 2 + nexus/db-queries/src/policy_test/resources.rs | 17 + nexus/db-queries/tests/output/authz-roles.out | 84 ++ .../output/lookup_affinity_sleds_query.sql | 30 + .../lookup_anti_affinity_sleds_query.sql | 32 + 8 files changed, 2079 insertions(+), 23 deletions(-) create mode 100644 nexus/db-queries/src/db/queries/affinity.rs create mode 100644 nexus/db-queries/tests/output/lookup_affinity_sleds_query.sql create mode 100644 nexus/db-queries/tests/output/lookup_anti_affinity_sleds_query.sql diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index ee4e6c31cf..b402cade0f 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -13,6 +13,7 @@ use crate::db::datastore::ValidateTransition; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::to_db_sled_policy; +use crate::db::model::AffinityPolicy; use crate::db::model::Sled; use crate::db::model::SledResource; use crate::db::model::SledState; @@ -20,6 +21,8 @@ use crate::db::model::SledUpdate; use crate::db::pagination::paginated; use crate::db::pagination::Paginator; use crate::db::pool::DbConnection; +use crate::db::queries::affinity::lookup_affinity_sleds_query; +use crate::db::queries::affinity::lookup_anti_affinity_sleds_query; use crate::db::update_and_check::{UpdateAndCheck, UpdateStatus}; use crate::db::TransactionError; use crate::transaction_retry::OptionalError; @@ -43,11 +46,34 @@ use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::SledUuid; +use std::collections::HashSet; use std::fmt; use strum::IntoEnumIterator; use thiserror::Error; use uuid::Uuid; +#[derive(Debug, thiserror::Error)] +enum SledReservationError { + #[error("No reservation could be found")] + NotFound, + #[error("More than one sled was found with 'affinity = required'")] + TooManyAffinityConstraints, + #[error("A sled that is required is also considered banned by anti-affinity rules")] + ConflictingAntiAndAffinityConstraints, + #[error("A sled required by an affinity group is not a valid target for other reasons")] + RequiredAffinitySledNotValid, +} + +#[derive(Debug, thiserror::Error)] +enum SledReservationTransactionError { + #[error(transparent)] + Connection(#[from] Error), + #[error(transparent)] + Diesel(#[from] diesel::result::Error), + #[error(transparent)] + Reservation(#[from] SledReservationError), +} + impl DataStore { /// Stores a new sled in the database. /// @@ -192,13 +218,43 @@ impl DataStore { resources: db::model::Resources, constraints: db::model::SledReservationConstraints, ) -> CreateResult { - #[derive(Debug)] - enum SledReservationError { - NotFound, - } + self.sled_reservation_create_inner( + opctx, + instance_id, + propolis_id, + resources, + constraints, + ) + .await + .map_err(|e| match e { + SledReservationTransactionError::Connection(e) => e, + SledReservationTransactionError::Diesel(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + SledReservationTransactionError::Reservation(e) => match e { + SledReservationError::NotFound + | SledReservationError::TooManyAffinityConstraints + | SledReservationError::ConflictingAntiAndAffinityConstraints + | SledReservationError::RequiredAffinitySledNotValid => { + return external::Error::insufficient_capacity( + "No sleds can fit the requested instance", + "No sled targets found that had enough \ + capacity to fit the requested instance.", + ); + } + }, + }) + } + async fn sled_reservation_create_inner( + &self, + opctx: &OpContext, + instance_id: InstanceUuid, + propolis_id: PropolisUuid, + resources: db::model::Resources, + constraints: db::model::SledReservationConstraints, + ) -> Result { let err = OptionalError::new(); - let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("sled_reservation_create") @@ -291,30 +347,197 @@ impl DataStore { define_sql_function!(fn random() -> diesel::sql_types::Float); - // We only actually care about one target here, so this - // query should have a `.limit(1)` attached. We fetch all - // sled targets to leave additional debugging information in - // the logs, for now. + // Fetch all viable sled targets let sled_targets = sled_targets .order(random()) .get_results_async::(&conn) .await?; + info!( opctx.log, - "found {} available sled targets", sled_targets.len(); + "found {} available sled targets before considering affinity", sled_targets.len(); "sled_ids" => ?sled_targets, ); - if sled_targets.is_empty() { - return Err(err.bail(SledReservationError::NotFound)); + let anti_affinity_sleds = lookup_anti_affinity_sleds_query( + instance_id, + ).get_results_async::<(AffinityPolicy, Uuid)>(&conn).await?; + + let affinity_sleds = lookup_affinity_sleds_query( + instance_id, + ).get_results_async::<(AffinityPolicy, Uuid)>(&conn).await?; + + // We use the following logic to calculate a desirable sled, + // given a possible set of "targets", and the information + // from affinity groups. + // + // # Rules vs Preferences + // + // Due to the flavors "affinity policy", it's possible to bucket affinity + // choices into two categories: "rules" and "preferences". "rules" are affinity + // dispositions for or against sled placement that must be followed, and + // "preferences" are affinity dispositions that should be followed for sled + // selection, in order of "most preferential" to "least preferential". + // + // As example of a "rule" is "an anti-affinity group exists, containing a + // target sled, with affinity_policy = 'fail'". + // + // An example of a "preference" is "an anti-affinity group exists, containing a + // target sled, but the policy is 'allow'. We don't want to use it as a target, + // but we will if there are no other choices." + // + // We apply rules before preferences to ensure they are always respected. + // Furthermore, the evaluation of preferences is a target-seeking operation, + // which identifies the distinct sets of targets, and searches them in + // decreasing preference order. + // + // # Logic + // + // ## Background: Notation + // + // We use the following symbols for sets below: + // - ∩: Intersection of two sets (A ∩ B is "everything that exists in A and + // also exists in B"). + // - \: difference of two sets (A \ B is "everything that exists in A that does + // not exist in B). + // + // We also use the following notation for brevity: + // - AA,P=Fail: All sleds sharing an anti-affinity instance within a group with + // policy = 'fail'. + // - AA,P=Allow: Same as above, but with policy = 'allow'. + // - A,P=Fail: All sleds sharing an affinity instance within a group with + // policy = 'fail'. + // - A,P=Allow: Same as above, but with policy = 'allow'. + // + // ## Affinity: Apply Rules + // + // - Targets := All viable sleds for instance placement + // - Banned := AA,P=Fail + // - Required := A,P=Fail + // - if Required.len() > 1: Fail (too many constraints). + // - if Required.len() == 1... + // - ... if the entry exists in the "Banned" set: Fail + // (contradicting constraints 'Banned' + 'Required') + // - ... if the entry does not exist in "Targets": Fail + // ('Required' constraint not satisfiable) + // - ... if the entry does not exist in "Banned": Use it. + // + // If we have not yet picked a target, we can filter the + // set of targets to ignore "banned" sleds, and then apply + // preferences. + // + // - Targets := Targets \ Banned + // + // ## Affinity: Apply Preferences + // + // - Preferred := Targets ∩ A,P=Allow + // - Unpreferred := Targets ∩ AA,P=Allow + // - Neither := Preferred ∩ Unpreferred + // - Preferred := Preferred \ Neither + // - Unpreferred := Unpreferred \ Neither + // - If Preferred isn't empty, pick a target from it. + // - Targets := Targets \ Unpreferred + // - If Targets isn't empty, pick a target from it. + // - If Unpreferred isn't empty, pick a target from it. + // - Fail, no targets are available. + + let mut targets: HashSet<_> = sled_targets.into_iter().collect(); + + let banned = anti_affinity_sleds.iter().filter_map(|(policy, id)| { + if *policy == AffinityPolicy::Fail { + Some(*id) + } else { + None + } + }).collect::>(); + let required = affinity_sleds.iter().filter_map(|(policy, id)| { + if *policy == AffinityPolicy::Fail { + Some(*id) + } else { + None + } + }).collect::>(); + + if !banned.is_empty() { + info!( + opctx.log, + "affinity policy prohibits placement on {} sleds", banned.len(); + "banned" => ?banned, + ); + } + if !required.is_empty() { + info!( + opctx.log, + "affinity policy requires placement on {} sleds", required.len(); + "required" => ?required, + ); } + let sled_target = if required.len() > 1 { + return Err(err.bail(SledReservationError::TooManyAffinityConstraints)); + } else if let Some(required_id) = required.iter().next() { + // If we have a "required" sled, it must be chosen. + + if banned.contains(&required_id) { + return Err(err.bail(SledReservationError::ConflictingAntiAndAffinityConstraints)); + } + if !targets.contains(&required_id) { + return Err(err.bail(SledReservationError::RequiredAffinitySledNotValid)); + } + *required_id + } else { + // We have no "required" sleds, but might have preferences. + + targets = targets.difference(&banned).cloned().collect(); + + let mut preferred = affinity_sleds.iter().filter_map(|(policy, id)| { + if *policy == AffinityPolicy::Allow { + Some(*id) + } else { + None + } + }).collect::>(); + let mut unpreferred = anti_affinity_sleds.iter().filter_map(|(policy, id)| { + if *policy == AffinityPolicy::Allow { + Some(*id) + } else { + None + } + }).collect::>(); + + // Only consider "preferred" sleds that are viable targets + preferred = targets.intersection(&preferred).cloned().collect(); + // Only consider "unpreferred" sleds that are viable targets + unpreferred = targets.intersection(&unpreferred).cloned().collect(); + + // If a target is both preferred and unpreferred, it is removed + // from both sets. + let both = preferred.intersection(&unpreferred).cloned().collect(); + preferred = preferred.difference(&both).cloned().collect(); + unpreferred = unpreferred.difference(&both).cloned().collect(); + + if let Some(target) = preferred.iter().next() { + *target + } else { + targets = targets.difference(&unpreferred).cloned().collect(); + if let Some(target) = targets.iter().next() { + *target + } else { + if let Some(target) = unpreferred.iter().next() { + *target + } else { + return Err(err.bail(SledReservationError::NotFound)); + } + } + } + }; + // Create a SledResource record, associate it with the target // sled. let resource = SledResource::new_for_vmm( propolis_id, instance_id, - SledUuid::from_untyped_uuid(sled_targets[0]), + SledUuid::from_untyped_uuid(sled_target), resources, ); @@ -328,17 +551,9 @@ impl DataStore { .await .map_err(|e| { if let Some(err) = err.take() { - match err { - SledReservationError::NotFound => { - return external::Error::insufficient_capacity( - "No sleds can fit the requested instance", - "No sled targets found that had enough \ - capacity to fit the requested instance.", - ); - } - } + return SledReservationTransactionError::Reservation(err); } - public_error_from_diesel(e, ErrorHandler::Server) + SledReservationTransactionError::Diesel(e) }) } @@ -849,7 +1064,10 @@ pub(in crate::db::datastore) mod test { }; use crate::db::lookup::LookupPath; use crate::db::model::to_db_typed_uuid; + use crate::db::model::AffinityGroup; + use crate::db::model::AntiAffinityGroup; use crate::db::model::ByteCount; + use crate::db::model::Project; use crate::db::model::SqlU32; use crate::db::pub_test_utils::TestDatabase; use anyhow::{Context, Result}; @@ -859,13 +1077,16 @@ pub(in crate::db::datastore) mod test { use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; use nexus_db_model::PhysicalDiskState; + use nexus_types::external_api::params; use nexus_types::identity::Asset; + use nexus_types::identity::Resource; use omicron_common::api::external; use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use predicates::{prelude::*, BoxPredicate}; + use std::collections::HashMap; use std::net::{Ipv6Addr, SocketAddrV6}; fn rack_id() -> Uuid { @@ -1159,6 +1380,1067 @@ pub(in crate::db::datastore) mod test { logctx.cleanup_successful(); } + // Utilities to help with Affinity Testing + + // Create a resource request that will probably fit on a sled. + fn small_resource_request() -> db::model::Resources { + db::model::Resources::new( + 1, + // Just require the bare non-zero amount of RAM. + ByteCount::try_from(1024).unwrap(), + ByteCount::try_from(1024).unwrap(), + ) + } + + // Create a resource request that will entirely fill a sled. + fn large_resource_request() -> db::model::Resources { + let sled_resources = sled_system_hardware_for_test(); + let threads = sled_resources.usable_hardware_threads; + let rss_ram = sled_resources.usable_physical_ram; + let reservoir_ram = sled_resources.reservoir_size; + db::model::Resources::new(threads, rss_ram, reservoir_ram) + } + + async fn create_project( + opctx: &OpContext, + datastore: &DataStore, + ) -> (authz::Project, db::model::Project) { + let authz_silo = opctx.authn.silo_required().unwrap(); + + // Create a project + let project = Project::new( + authz_silo.id(), + params::ProjectCreate { + identity: external::IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: "desc".to_string(), + }, + }, + ); + datastore.project_create(&opctx, project).await.unwrap() + } + + async fn create_anti_affinity_group( + opctx: &OpContext, + datastore: &DataStore, + authz_project: &authz::Project, + name: &'static str, + policy: external::AffinityPolicy, + ) -> AntiAffinityGroup { + datastore + .anti_affinity_group_create( + &opctx, + &authz_project, + AntiAffinityGroup::new( + authz_project.id(), + params::AntiAffinityGroupCreate { + identity: external::IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "desc".to_string(), + }, + policy, + failure_domain: external::FailureDomain::Sled, + }, + ), + ) + .await + .unwrap() + } + + // This short-circuits some of the logic and checks we normally have when + // creating affinity groups, but makes testing easier. + async fn add_instance_to_anti_affinity_group( + datastore: &DataStore, + group_id: Uuid, + instance_id: Uuid, + ) { + use db::model::AntiAffinityGroupInstanceMembership; + use db::schema::anti_affinity_group_instance_membership::dsl as membership_dsl; + use omicron_uuid_kinds::AntiAffinityGroupUuid; + + diesel::insert_into( + membership_dsl::anti_affinity_group_instance_membership, + ) + .values(AntiAffinityGroupInstanceMembership::new( + AntiAffinityGroupUuid::from_untyped_uuid(group_id), + InstanceUuid::from_untyped_uuid(instance_id), + )) + .on_conflict((membership_dsl::group_id, membership_dsl::instance_id)) + .do_nothing() + .execute_async(&*datastore.pool_connection_for_tests().await.unwrap()) + .await + .unwrap(); + } + + async fn create_affinity_group( + opctx: &OpContext, + datastore: &DataStore, + authz_project: &authz::Project, + name: &'static str, + policy: external::AffinityPolicy, + ) -> AffinityGroup { + datastore + .affinity_group_create( + &opctx, + &authz_project, + AffinityGroup::new( + authz_project.id(), + params::AffinityGroupCreate { + identity: external::IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "desc".to_string(), + }, + policy, + failure_domain: external::FailureDomain::Sled, + }, + ), + ) + .await + .unwrap() + } + + // This short-circuits some of the logic and checks we normally have when + // creating affinity groups, but makes testing easier. + async fn add_instance_to_affinity_group( + datastore: &DataStore, + group_id: Uuid, + instance_id: Uuid, + ) { + use db::model::AffinityGroupInstanceMembership; + use db::schema::affinity_group_instance_membership::dsl as membership_dsl; + use omicron_uuid_kinds::AffinityGroupUuid; + + diesel::insert_into(membership_dsl::affinity_group_instance_membership) + .values(AffinityGroupInstanceMembership::new( + AffinityGroupUuid::from_untyped_uuid(group_id), + InstanceUuid::from_untyped_uuid(instance_id), + )) + .on_conflict(( + membership_dsl::group_id, + membership_dsl::instance_id, + )) + .do_nothing() + .execute_async( + &*datastore.pool_connection_for_tests().await.unwrap(), + ) + .await + .unwrap(); + } + + async fn create_sleds(datastore: &DataStore, count: usize) -> Vec { + let mut sleds = vec![]; + for _ in 0..count { + let (sled, _) = + datastore.sled_upsert(test_new_sled_update()).await.unwrap(); + sleds.push(sled); + } + sleds + } + + type GroupName = &'static str; + + #[derive(Copy, Clone, Debug)] + enum Affinity { + Positive, + Negative, + } + + struct Group { + affinity: Affinity, + name: GroupName, + policy: external::AffinityPolicy, + } + + impl Group { + async fn create( + &self, + opctx: &OpContext, + datastore: &DataStore, + authz_project: &authz::Project, + ) -> Uuid { + match self.affinity { + Affinity::Positive => create_affinity_group( + &opctx, + &datastore, + &authz_project, + self.name, + self.policy, + ) + .await + .id(), + Affinity::Negative => create_anti_affinity_group( + &opctx, + &datastore, + &authz_project, + self.name, + self.policy, + ) + .await + .id(), + } + } + } + + struct AllGroups { + id_by_name: HashMap<&'static str, (Affinity, Uuid)>, + } + + impl AllGroups { + async fn create( + opctx: &OpContext, + datastore: &DataStore, + authz_project: &authz::Project, + groups: &[Group], + ) -> AllGroups { + let mut id_by_name = HashMap::new(); + + for group in groups { + id_by_name.insert( + group.name, + ( + group.affinity, + group.create(&opctx, &datastore, &authz_project).await, + ), + ); + } + + Self { id_by_name } + } + } + + struct Instance { + id: InstanceUuid, + groups: Vec, + force_onto_sled: Option, + resources: db::model::Resources, + } + + impl Instance { + fn new() -> Self { + Self { + id: InstanceUuid::new_v4(), + groups: vec![], + force_onto_sled: None, + resources: small_resource_request(), + } + } + + fn use_many_resources(mut self) -> Self { + self.resources = large_resource_request(); + self + } + + // Adds this instance to a group. Can be called multiple times. + fn group(mut self, group: GroupName) -> Self { + self.groups.push(group); + self + } + + // Force this instance to be placed on a specific sled + fn sled(mut self, sled: Uuid) -> Self { + self.force_onto_sled = Some(SledUuid::from_untyped_uuid(sled)); + self + } + + async fn add_to_groups_and_reserve( + &self, + opctx: &OpContext, + datastore: &DataStore, + all_groups: &AllGroups, + ) -> Result + { + self.add_to_groups(&datastore, &all_groups).await; + create_instance_reservation(&datastore, &opctx, self).await + } + + async fn add_to_groups( + &self, + datastore: &DataStore, + all_groups: &AllGroups, + ) { + for our_group_name in &self.groups { + let (affinity, group_id) = all_groups + .id_by_name + .get(our_group_name) + .expect("Group not found: {our_group_name}"); + match *affinity { + Affinity::Positive => { + add_instance_to_affinity_group( + &datastore, + *group_id, + self.id.into_untyped_uuid(), + ) + .await + } + Affinity::Negative => { + add_instance_to_anti_affinity_group( + &datastore, + *group_id, + self.id.into_untyped_uuid(), + ) + .await + } + } + } + } + } + + async fn create_instance_reservation( + db: &DataStore, + opctx: &OpContext, + instance: &Instance, + ) -> Result { + // Pick a specific sled, if requested + let constraints = db::model::SledReservationConstraintBuilder::new(); + let constraints = if let Some(sled_target) = instance.force_onto_sled { + constraints.must_select_from(&[sled_target.into_untyped_uuid()]) + } else { + constraints + }; + + // We're accessing the inner implementation to get a more detailed + // view of the error code. + let result = db + .sled_reservation_create_inner( + &opctx, + instance.id, + PropolisUuid::new_v4(), + instance.resources.clone(), + constraints.build(), + ) + .await?; + + if let Some(sled_target) = instance.force_onto_sled { + assert_eq!(SledUuid::from(result.sled_id), sled_target); + } + + Ok(result) + } + + // Anti-Affinity, Policy = Fail + // No sleds available => Insufficient Capacity Error + #[tokio::test] + async fn anti_affinity_policy_fail_no_capacity() { + let logctx = + dev::test_setup_log("anti_affinity_policy_fail_no_capacity"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 2; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [Group { + affinity: Affinity::Negative, + name: "anti-affinity", + policy: external::AffinityPolicy::Fail, + }]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + let instances = [ + Instance::new().group("anti-affinity").sled(sleds[0].id()), + Instance::new().group("anti-affinity").sled(sleds[1].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = Instance::new().group("anti-affinity"); + let err = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect_err("Should have failed to place instance"); + + let SledReservationTransactionError::Reservation( + SledReservationError::NotFound, + ) = err + else { + panic!("Unexpected error: {err:?}"); + }; + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Anti-Affinity, Policy = Fail + // We should reliably pick a sled not occupied by another instance + #[tokio::test] + async fn anti_affinity_policy_fail() { + let logctx = dev::test_setup_log("anti_affinity_policy_fail"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 3; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [Group { + affinity: Affinity::Negative, + name: "anti-affinity", + policy: external::AffinityPolicy::Fail, + }]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + let instances = [ + Instance::new().group("anti-affinity").sled(sleds[0].id()), + Instance::new().group("anti-affinity").sled(sleds[1].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = Instance::new().group("anti-affinity"); + let resource = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have succeeded allocation"); + assert_eq!(resource.sled_id.into_untyped_uuid(), sleds[2].id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Anti-Affinity, Policy = Allow + // + // Create two sleds, put instances on each belonging to an anti-affinity group. + // We can continue to add instances belonging to that anti-affinity group, because + // it has "AffinityPolicy::Allow". + #[tokio::test] + async fn anti_affinity_policy_allow() { + let logctx = dev::test_setup_log("anti_affinity_policy_allow"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 2; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [Group { + affinity: Affinity::Negative, + name: "anti-affinity", + policy: external::AffinityPolicy::Allow, + }]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + let instances = [ + Instance::new().group("anti-affinity").sled(sleds[0].id()), + Instance::new().group("anti-affinity").sled(sleds[1].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = Instance::new().group("anti-affinity"); + let resource = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have succeeded allocation"); + assert!( + [sleds[0].id(), sleds[1].id()] + .contains(resource.sled_id.as_untyped_uuid()), + "Should have been provisioned to one of the two viable sleds" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Affinity, Policy = Fail + // + // Placement of instances with positive affinity will share a sled. + #[tokio::test] + async fn affinity_policy_fail() { + let logctx = dev::test_setup_log("affinity_policy_fail"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 2; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [Group { + affinity: Affinity::Positive, + name: "affinity", + policy: external::AffinityPolicy::Fail, + }]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + let instances = [Instance::new().group("affinity").sled(sleds[0].id())]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = Instance::new().group("affinity"); + let resource = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have placed instance"); + assert_eq!(resource.sled_id.into_untyped_uuid(), sleds[0].id()); + + let another_test_instance = Instance::new().group("affinity"); + let resource = another_test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have placed instance (again)"); + assert_eq!(resource.sled_id.into_untyped_uuid(), sleds[0].id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Affinity, Policy = Fail, with too many constraints. + #[tokio::test] + async fn affinity_policy_fail_too_many_constraints() { + let logctx = + dev::test_setup_log("affinity_policy_fail_too_many_constraints"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 3; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [ + Group { + affinity: Affinity::Positive, + name: "affinity1", + policy: external::AffinityPolicy::Fail, + }, + Group { + affinity: Affinity::Positive, + name: "affinity2", + policy: external::AffinityPolicy::Fail, + }, + ]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + + // We are constrained so that an instance belonging to both groups must be + // placed on both sled 0 and sled 1. This cannot be satisfied, and it returns + // an error. + let instances = [ + Instance::new().group("affinity1").sled(sleds[0].id()), + Instance::new().group("affinity2").sled(sleds[1].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = + Instance::new().group("affinity1").group("affinity2"); + let err = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect_err("Should have failed to place instance"); + + let SledReservationTransactionError::Reservation( + SledReservationError::TooManyAffinityConstraints, + ) = err + else { + panic!("Unexpected error: {err:?}"); + }; + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Affinity, Policy = Fail forces "no space" early + // + // Placement of instances with positive affinity with "policy = Fail" + // will always be forced to share a sled, even if there are other options. + #[tokio::test] + async fn affinity_policy_fail_no_capacity() { + let logctx = dev::test_setup_log("affinity_policy_fail_no_capacity"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 2; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [Group { + affinity: Affinity::Positive, + name: "affinity", + policy: external::AffinityPolicy::Fail, + }]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + let instances = [Instance::new() + .use_many_resources() + .group("affinity") + .sled(sleds[0].id())]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = + Instance::new().use_many_resources().group("affinity"); + let err = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect_err("Should have failed to place instance"); + let SledReservationTransactionError::Reservation( + SledReservationError::RequiredAffinitySledNotValid, + ) = err + else { + panic!("Unexpected error: {err:?}"); + }; + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Affinity, Policy = Allow lets us use other sleds + // + // This is similar to "affinity_policy_fail_no_capacity", but by + // using "Policy = Allow" instead of "Policy = Fail", we are able to pick + // a different sled for the reservation. + #[tokio::test] + async fn affinity_policy_allow_picks_different_sled() { + let logctx = + dev::test_setup_log("affinity_policy_allow_picks_different_sled"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 2; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [Group { + affinity: Affinity::Positive, + name: "affinity", + policy: external::AffinityPolicy::Allow, + }]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + let instances = [Instance::new() + .use_many_resources() + .group("affinity") + .sled(sleds[0].id())]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = + Instance::new().use_many_resources().group("affinity"); + let reservation = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have made reservation"); + assert_eq!(reservation.sled_id.into_untyped_uuid(), sleds[1].id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Anti-Affinity, Policy = Fail + Affinity, Policy = Fail + // + // These constraints are contradictory - we're asking the allocator + // to colocate and NOT colocate our new instance with existing ones, which + // should not be satisfiable. + #[tokio::test] + async fn affinity_and_anti_affinity_policy_fail() { + let logctx = + dev::test_setup_log("affinity_and_anti_affinity_policy_fail"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 2; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [ + Group { + affinity: Affinity::Negative, + name: "anti-affinity", + policy: external::AffinityPolicy::Fail, + }, + Group { + affinity: Affinity::Positive, + name: "affinity", + policy: external::AffinityPolicy::Fail, + }, + ]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + let instances = [Instance::new() + .group("anti-affinity") + .group("affinity") + .sled(sleds[0].id())]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = + Instance::new().group("anti-affinity").group("affinity"); + let err = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect_err("Contradictory constraints should not be satisfiable"); + let SledReservationTransactionError::Reservation( + SledReservationError::ConflictingAntiAndAffinityConstraints, + ) = err + else { + panic!("Unexpected error: {err:?}"); + }; + + db.terminate().await; + logctx.cleanup_successful(); + } + + // Anti-Affinity, Policy = Allow + Affinity, Policy = Allow + // + // These constraints are contradictory, but since they encode + // "preferences" rather than "rules", they cancel out. + #[tokio::test] + async fn affinity_and_anti_affinity_policy_allow() { + let logctx = + dev::test_setup_log("affinity_and_anti_affinity_policy_allow"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 2; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [ + Group { + affinity: Affinity::Negative, + name: "anti-affinity", + policy: external::AffinityPolicy::Allow, + }, + Group { + affinity: Affinity::Positive, + name: "affinity", + policy: external::AffinityPolicy::Allow, + }, + ]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + let instances = [ + Instance::new() + .group("anti-affinity") + .group("affinity") + .sled(sleds[0].id()), + Instance::new() + .group("anti-affinity") + .group("affinity") + .sled(sleds[1].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = + Instance::new().group("anti-affinity").group("affinity"); + let resource = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have succeeded allocation"); + assert!( + [sleds[0].id(), sleds[1].id()] + .contains(resource.sled_id.as_untyped_uuid()), + "Should have been provisioned to one of the two viable sleds" + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn anti_affinity_multi_group() { + let logctx = dev::test_setup_log("anti_affinity_multi_group"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 4; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [ + Group { + affinity: Affinity::Negative, + name: "strict-anti-affinity", + policy: external::AffinityPolicy::Fail, + }, + Group { + affinity: Affinity::Negative, + name: "anti-affinity", + policy: external::AffinityPolicy::Allow, + }, + Group { + affinity: Affinity::Positive, + name: "affinity", + policy: external::AffinityPolicy::Allow, + }, + ]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + + // Sleds 0 and 1 contain the "strict-anti-affinity" group instances, + // and won't be used. + // + // Sled 3 has an "anti-affinity" group instance, and also won't be used. + // + // This only leaves sled 2. + let instances = [ + Instance::new() + .group("strict-anti-affinity") + .group("affinity") + .sled(sleds[0].id()), + Instance::new().group("strict-anti-affinity").sled(sleds[1].id()), + Instance::new().group("anti-affinity").sled(sleds[3].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = Instance::new() + .group("strict-anti-affinity") + .group("anti-affinity") + .group("affinity"); + let resource = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have succeeded allocation"); + + assert_eq!(resource.sled_id.into_untyped_uuid(), sleds[2].id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn affinity_multi_group() { + let logctx = dev::test_setup_log("affinity_multi_group"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 4; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [ + Group { + affinity: Affinity::Positive, + name: "affinity", + policy: external::AffinityPolicy::Allow, + }, + Group { + affinity: Affinity::Negative, + name: "anti-affinity", + policy: external::AffinityPolicy::Allow, + }, + ]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + + // Sled 0 contains an affinity group, but it's large enough to make it + // non-viable for future allocations. + // + // Sled 1 contains an affinity and anti-affinity group, so they cancel out. + // This gives it "no priority". + // + // Sled 2 contains nothing. It's a viable target, neither preferred nor + // unpreferred. + // + // Sled 3 contains an affinity group, which is prioritized. + let instances = [ + Instance::new() + .group("affinity") + .use_many_resources() + .sled(sleds[0].id()), + Instance::new() + .group("affinity") + .group("anti-affinity") + .sled(sleds[1].id()), + Instance::new().group("affinity").sled(sleds[3].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = + Instance::new().group("affinity").group("anti-affinity"); + let resource = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have succeeded allocation"); + + assert_eq!(resource.sled_id.into_untyped_uuid(), sleds[3].id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn affinity_ignored_from_other_groups() { + let logctx = dev::test_setup_log("affinity_ignored_from_other_groups"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 3; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [ + Group { + affinity: Affinity::Positive, + name: "affinity1", + policy: external::AffinityPolicy::Fail, + }, + Group { + affinity: Affinity::Positive, + name: "affinity2", + policy: external::AffinityPolicy::Fail, + }, + ]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + + // Only "sleds[1]" has space. We ignore the affinity policy because + // our new instance won't belong to either group. + let instances = [ + Instance::new() + .group("affinity1") + .use_many_resources() + .sled(sleds[0].id()), + Instance::new() + .group("affinity2") + .use_many_resources() + .sled(sleds[2].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = Instance::new(); + let resource = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have succeeded allocation"); + + assert_eq!(resource.sled_id.into_untyped_uuid(), sleds[1].id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn anti_affinity_ignored_from_other_groups() { + let logctx = + dev::test_setup_log("anti_affinity_ignored_from_other_groups"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + let (authz_project, _project) = + create_project(&opctx, &datastore).await; + + const SLED_COUNT: usize = 3; + let sleds = create_sleds(&datastore, SLED_COUNT).await; + + let groups = [ + Group { + affinity: Affinity::Negative, + name: "anti-affinity1", + policy: external::AffinityPolicy::Fail, + }, + Group { + affinity: Affinity::Negative, + name: "anti-affinity2", + policy: external::AffinityPolicy::Fail, + }, + ]; + let all_groups = + AllGroups::create(&opctx, &datastore, &authz_project, &groups) + .await; + + // Only "sleds[2]" has space, even though it also contains an anti-affinity group. + // However, if we don't belong to this group, we won't care. + + let instances = [ + Instance::new().group("anti-affinity1").sled(sleds[0].id()), + Instance::new().use_many_resources().sled(sleds[1].id()), + Instance::new().group("anti-affinity2").sled(sleds[2].id()), + ]; + for instance in instances { + instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Failed to set up instances"); + } + + let test_instance = Instance::new().group("anti-affinity1"); + let resource = test_instance + .add_to_groups_and_reserve(&opctx, &datastore, &all_groups) + .await + .expect("Should have succeeded allocation"); + + assert_eq!(resource.sled_id.into_untyped_uuid(), sleds[2].id()); + + db.terminate().await; + logctx.cleanup_successful(); + } + async fn lookup_physical_disk( datastore: &DataStore, id: PhysicalDiskUuid, diff --git a/nexus/db-queries/src/db/queries/affinity.rs b/nexus/db-queries/src/db/queries/affinity.rs new file mode 100644 index 0000000000..6da857669d --- /dev/null +++ b/nexus/db-queries/src/db/queries/affinity.rs @@ -0,0 +1,608 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implementation of queries for affinity groups + +use crate::db::model::AffinityPolicyEnum; +use crate::db::raw_query_builder::{QueryBuilder, TypedSqlQuery}; +use diesel::sql_types; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::InstanceUuid; + +/// For an instance, look up all anti-affinity groups, and return +/// a list of sleds with other instances in that anti-affinity group +/// already reserved. +pub fn lookup_anti_affinity_sleds_query( + instance_id: InstanceUuid, +) -> TypedSqlQuery<(AffinityPolicyEnum, sql_types::Uuid)> { + QueryBuilder::new().sql( + "WITH our_groups AS ( + SELECT group_id + FROM anti_affinity_group_instance_membership + WHERE instance_id = ").param().sql(" + ), + other_instances AS ( + SELECT anti_affinity_group_instance_membership.group_id,instance_id + FROM anti_affinity_group_instance_membership + JOIN our_groups + ON anti_affinity_group_instance_membership.group_id = our_groups.group_id + WHERE instance_id != ").param().sql(" + ), + other_instances_by_policy AS ( + SELECT policy,instance_id + FROM other_instances + JOIN anti_affinity_group + ON + anti_affinity_group.id = other_instances.group_id AND + anti_affinity_group.failure_domain = 'sled' + WHERE anti_affinity_group.time_deleted IS NULL + ) + SELECT DISTINCT policy,sled_id + FROM other_instances_by_policy + JOIN sled_resource + ON + sled_resource.instance_id = other_instances_by_policy.instance_id AND + sled_resource.kind = 'instance'") + .bind::(instance_id.into_untyped_uuid()) + .bind::(instance_id.into_untyped_uuid()) + .query() +} + +/// For an instance, look up all affinity groups, and return a list of sleds +/// with other instances in that affinity group already reserved. +pub fn lookup_affinity_sleds_query( + instance_id: InstanceUuid, +) -> TypedSqlQuery<(AffinityPolicyEnum, sql_types::Uuid)> { + QueryBuilder::new() + .sql( + "WITH our_groups AS ( + SELECT group_id + FROM affinity_group_instance_membership + WHERE instance_id = ", + ) + .param() + .sql( + " + ), + other_instances AS ( + SELECT affinity_group_instance_membership.group_id,instance_id + FROM affinity_group_instance_membership + JOIN our_groups + ON affinity_group_instance_membership.group_id = our_groups.group_id + WHERE instance_id != ", + ) + .param() + .sql( + " + ), + other_instances_by_policy AS ( + SELECT policy,instance_id + FROM other_instances + JOIN affinity_group + ON + affinity_group.id = other_instances.group_id AND + affinity_group.failure_domain = 'sled' + WHERE affinity_group.time_deleted IS NULL + ) + SELECT DISTINCT policy,sled_id + FROM other_instances_by_policy + JOIN sled_resource + ON + sled_resource.instance_id = other_instances_by_policy.instance_id AND + sled_resource.kind = 'instance'", + ) + .bind::(instance_id.into_untyped_uuid()) + .bind::(instance_id.into_untyped_uuid()) + .query() +} + +#[cfg(test)] +mod test { + use super::*; + use crate::db::explain::ExplainableAsync; + use crate::db::model; + use crate::db::pub_test_utils::TestDatabase; + use crate::db::raw_query_builder::expectorate_query_contents; + + use anyhow::Context; + use async_bb8_diesel::AsyncRunQueryDsl; + use nexus_types::external_api::params; + use nexus_types::identity::Resource; + use omicron_common::api::external; + use omicron_test_utils::dev; + use omicron_uuid_kinds::AffinityGroupUuid; + use omicron_uuid_kinds::AntiAffinityGroupUuid; + use omicron_uuid_kinds::PropolisUuid; + use omicron_uuid_kinds::SledUuid; + use uuid::Uuid; + + #[tokio::test] + async fn expectorate_lookup_anti_affinity_sleds_query() { + let id = InstanceUuid::nil(); + + let query = lookup_anti_affinity_sleds_query(id); + expectorate_query_contents( + &query, + "tests/output/lookup_anti_affinity_sleds_query.sql", + ) + .await; + } + + #[tokio::test] + async fn expectorate_lookup_affinity_sleds_query() { + let id = InstanceUuid::nil(); + + let query = lookup_affinity_sleds_query(id); + expectorate_query_contents( + &query, + "tests/output/lookup_affinity_sleds_query.sql", + ) + .await; + } + + #[tokio::test] + async fn explain_lookup_anti_affinity_sleds_query() { + let logctx = + dev::test_setup_log("explain_lookup_anti_affinity_sleds_query"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let pool = db.pool(); + let conn = pool.claim().await.unwrap(); + + let id = InstanceUuid::nil(); + let query = lookup_anti_affinity_sleds_query(id); + let _ = query + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn explain_lookup_affinity_sleds_query() { + let logctx = dev::test_setup_log("explain_lookup_affinity_sleds_query"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let pool = db.pool(); + let conn = pool.claim().await.unwrap(); + + let id = InstanceUuid::nil(); + let query = lookup_affinity_sleds_query(id); + let _ = query + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); + + db.terminate().await; + logctx.cleanup_successful(); + } + + async fn make_affinity_group( + project_id: Uuid, + name: &'static str, + policy: external::AffinityPolicy, + conn: &async_bb8_diesel::Connection, + ) -> anyhow::Result { + let group = model::AffinityGroup::new( + project_id, + params::AffinityGroupCreate { + identity: external::IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "desc".to_string(), + }, + policy, + failure_domain: external::FailureDomain::Sled, + }, + ); + use crate::db::schema::affinity_group::dsl; + diesel::insert_into(dsl::affinity_group) + .values(group.clone()) + .execute_async(conn) + .await + .context("Cannot create affinity group")?; + Ok(group) + } + + async fn make_anti_affinity_group( + project_id: Uuid, + name: &'static str, + policy: external::AffinityPolicy, + conn: &async_bb8_diesel::Connection, + ) -> anyhow::Result { + let group = model::AntiAffinityGroup::new( + project_id, + params::AntiAffinityGroupCreate { + identity: external::IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "desc".to_string(), + }, + policy, + failure_domain: external::FailureDomain::Sled, + }, + ); + use crate::db::schema::anti_affinity_group::dsl; + diesel::insert_into(dsl::anti_affinity_group) + .values(group.clone()) + .execute_async(conn) + .await + .context("Cannot create anti affinity group")?; + Ok(group) + } + + async fn make_affinity_group_instance_membership( + group_id: AffinityGroupUuid, + instance_id: InstanceUuid, + conn: &async_bb8_diesel::Connection, + ) -> anyhow::Result<()> { + // Let's claim an instance belongs to that group + let membership = + model::AffinityGroupInstanceMembership::new(group_id, instance_id); + use crate::db::schema::affinity_group_instance_membership::dsl; + diesel::insert_into(dsl::affinity_group_instance_membership) + .values(membership) + .execute_async(conn) + .await + .context("Cannot create affinity group instance membership")?; + Ok(()) + } + + async fn make_anti_affinity_group_instance_membership( + group_id: AntiAffinityGroupUuid, + instance_id: InstanceUuid, + conn: &async_bb8_diesel::Connection, + ) -> anyhow::Result<()> { + // Let's claim an instance belongs to that group + let membership = model::AntiAffinityGroupInstanceMembership::new( + group_id, + instance_id, + ); + use crate::db::schema::anti_affinity_group_instance_membership::dsl; + diesel::insert_into(dsl::anti_affinity_group_instance_membership) + .values(membership) + .execute_async(conn) + .await + .context("Cannot create anti affinity group instance membership")?; + Ok(()) + } + + async fn make_instance_sled_resource( + sled_id: SledUuid, + instance_id: InstanceUuid, + conn: &async_bb8_diesel::Connection, + ) -> anyhow::Result<()> { + let resource = model::SledResource::new_for_vmm( + PropolisUuid::new_v4(), + instance_id, + sled_id, + model::Resources::new( + 0, + model::ByteCount::from( + external::ByteCount::from_gibibytes_u32(0), + ), + model::ByteCount::from( + external::ByteCount::from_gibibytes_u32(0), + ), + ), + ); + use crate::db::schema::sled_resource::dsl; + diesel::insert_into(dsl::sled_resource) + .values(resource) + .execute_async(conn) + .await + .context("Cannot create sled resource")?; + Ok(()) + } + + #[tokio::test] + async fn lookup_affinity_sleds() { + let logctx = dev::test_setup_log("lookup_affinity_sleds"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let pool = db.pool(); + let conn = pool.claim().await.unwrap(); + + let our_instance_id = InstanceUuid::new_v4(); + let other_instance_id = InstanceUuid::new_v4(); + + // With no groups and no instances, we should see no other instances + // belonging to our affinity group. + assert_eq!( + lookup_affinity_sleds_query(our_instance_id,) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(), + vec![], + ); + + let sled_id = SledUuid::new_v4(); + let project_id = Uuid::new_v4(); + + // Make a group, add our instance to it + let group = make_affinity_group( + project_id, + "group-allow", + external::AffinityPolicy::Allow, + &conn, + ) + .await + .unwrap(); + make_affinity_group_instance_membership( + AffinityGroupUuid::from_untyped_uuid(group.id()), + our_instance_id, + &conn, + ) + .await + .unwrap(); + + // Create an instance which belongs to that group + make_affinity_group_instance_membership( + AffinityGroupUuid::from_untyped_uuid(group.id()), + other_instance_id, + &conn, + ) + .await + .unwrap(); + + make_instance_sled_resource(sled_id, other_instance_id, &conn) + .await + .unwrap(); + + // Now if we look, we'll find the "other sled", on which the "other + // instance" was placed. + assert_eq!( + lookup_affinity_sleds_query(our_instance_id,) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(), + vec![(model::AffinityPolicy::Allow, sled_id.into_untyped_uuid())], + ); + + // If we look from the perspective of the other instance, + // we "ignore ourselves" for placement, so the set of sleds to consider + // is still empty. + assert_eq!( + lookup_affinity_sleds_query(other_instance_id,) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(), + vec![] + ); + + // If we make another group (note the policy is different this time!) + // it'll also be reflected in the results. + let group = make_affinity_group( + project_id, + "group-fail", + external::AffinityPolicy::Fail, + &conn, + ) + .await + .unwrap(); + make_affinity_group_instance_membership( + AffinityGroupUuid::from_untyped_uuid(group.id()), + our_instance_id, + &conn, + ) + .await + .unwrap(); + make_affinity_group_instance_membership( + AffinityGroupUuid::from_untyped_uuid(group.id()), + other_instance_id, + &conn, + ) + .await + .unwrap(); + + // We see the outcome of both groups, with a different policy for each. + let mut results = lookup_affinity_sleds_query(our_instance_id) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(); + results.sort(); + assert_eq!( + results, + vec![ + (model::AffinityPolicy::Fail, sled_id.into_untyped_uuid()), + (model::AffinityPolicy::Allow, sled_id.into_untyped_uuid()), + ], + ); + + // Let's add one more group, to see what happens to the query output. + let group = make_affinity_group( + project_id, + "group-fail2", + external::AffinityPolicy::Fail, + &conn, + ) + .await + .unwrap(); + make_affinity_group_instance_membership( + AffinityGroupUuid::from_untyped_uuid(group.id()), + our_instance_id, + &conn, + ) + .await + .unwrap(); + make_affinity_group_instance_membership( + AffinityGroupUuid::from_untyped_uuid(group.id()), + other_instance_id, + &conn, + ) + .await + .unwrap(); + let mut results = lookup_affinity_sleds_query(our_instance_id) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(); + results.sort(); + + // Since we use "SELECT DISTINCT", the results are bounded, and do not + // grow as we keep on finding more sleds that have the same policy. + assert_eq!( + results, + vec![ + (model::AffinityPolicy::Fail, sled_id.into_untyped_uuid()), + (model::AffinityPolicy::Allow, sled_id.into_untyped_uuid()), + ], + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn lookup_anti_affinity_sleds() { + let logctx = dev::test_setup_log("lookup_anti_affinity_sleds"); + let db = TestDatabase::new_with_pool(&logctx.log).await; + let pool = db.pool(); + let conn = pool.claim().await.unwrap(); + + let our_instance_id = InstanceUuid::new_v4(); + let other_instance_id = InstanceUuid::new_v4(); + + // With no groups and no instances, we should see no other instances + // belonging to our anti-affinity group. + assert_eq!( + lookup_anti_affinity_sleds_query(our_instance_id,) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(), + vec![], + ); + + let sled_id = SledUuid::new_v4(); + let project_id = Uuid::new_v4(); + + // Make a group, add our instance to it + let group = make_anti_affinity_group( + project_id, + "group-allow", + external::AffinityPolicy::Allow, + &conn, + ) + .await + .unwrap(); + make_anti_affinity_group_instance_membership( + AntiAffinityGroupUuid::from_untyped_uuid(group.id()), + our_instance_id, + &conn, + ) + .await + .unwrap(); + + // Create an instance which belongs to that group + make_anti_affinity_group_instance_membership( + AntiAffinityGroupUuid::from_untyped_uuid(group.id()), + other_instance_id, + &conn, + ) + .await + .unwrap(); + + make_instance_sled_resource(sled_id, other_instance_id, &conn) + .await + .unwrap(); + + // Now if we look, we'll find the "other sled", on which the "other + // instance" was placed. + assert_eq!( + lookup_anti_affinity_sleds_query(our_instance_id,) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(), + vec![(model::AffinityPolicy::Allow, sled_id.into_untyped_uuid())], + ); + + // If we look from the perspective of the other instance, + // we "ignore ourselves" for placement, so the set of sleds to consider + // is still empty. + assert_eq!( + lookup_anti_affinity_sleds_query(other_instance_id,) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(), + vec![] + ); + + // If we make another group (note the policy is different this time!) + // it'll also be reflected in the results. + let group = make_anti_affinity_group( + project_id, + "group-fail", + external::AffinityPolicy::Fail, + &conn, + ) + .await + .unwrap(); + make_anti_affinity_group_instance_membership( + AntiAffinityGroupUuid::from_untyped_uuid(group.id()), + our_instance_id, + &conn, + ) + .await + .unwrap(); + make_anti_affinity_group_instance_membership( + AntiAffinityGroupUuid::from_untyped_uuid(group.id()), + other_instance_id, + &conn, + ) + .await + .unwrap(); + + // We see the outcome of both groups, with a different policy for each. + let mut results = lookup_anti_affinity_sleds_query(our_instance_id) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(); + results.sort(); + assert_eq!( + results, + vec![ + (model::AffinityPolicy::Fail, sled_id.into_untyped_uuid()), + (model::AffinityPolicy::Allow, sled_id.into_untyped_uuid()), + ], + ); + + // Let's add one more group, to see what happens to the query output. + let group = make_anti_affinity_group( + project_id, + "group-fail2", + external::AffinityPolicy::Fail, + &conn, + ) + .await + .unwrap(); + make_anti_affinity_group_instance_membership( + AntiAffinityGroupUuid::from_untyped_uuid(group.id()), + our_instance_id, + &conn, + ) + .await + .unwrap(); + make_anti_affinity_group_instance_membership( + AntiAffinityGroupUuid::from_untyped_uuid(group.id()), + other_instance_id, + &conn, + ) + .await + .unwrap(); + let mut results = lookup_anti_affinity_sleds_query(our_instance_id) + .get_results_async::<(model::AffinityPolicy, Uuid)>(&*conn) + .await + .unwrap(); + results.sort(); + + // Since we use "SELECT DISTINCT", the results are bounded, and do not + // grow as we keep on finding more sleds that have the same policy. + assert_eq!( + results, + vec![ + (model::AffinityPolicy::Fail, sled_id.into_untyped_uuid()), + (model::AffinityPolicy::Allow, sled_id.into_untyped_uuid()), + ], + ); + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/queries/mod.rs b/nexus/db-queries/src/db/queries/mod.rs index 5f34c7cfb3..f2af3aebc9 100644 --- a/nexus/db-queries/src/db/queries/mod.rs +++ b/nexus/db-queries/src/db/queries/mod.rs @@ -5,6 +5,7 @@ //! Specialized queries for inserting database records, usually to maintain //! complex invariants that are most accurately expressed in a single query. +pub mod affinity; pub mod disk; pub mod external_ip; pub mod ip_pool; diff --git a/nexus/db-queries/src/policy_test/resource_builder.rs b/nexus/db-queries/src/policy_test/resource_builder.rs index b6d7d97553..310c11adf3 100644 --- a/nexus/db-queries/src/policy_test/resource_builder.rs +++ b/nexus/db-queries/src/policy_test/resource_builder.rs @@ -243,6 +243,8 @@ macro_rules! impl_dyn_authorized_resource_for_resource { } impl_dyn_authorized_resource_for_resource!(authz::AddressLot); +impl_dyn_authorized_resource_for_resource!(authz::AffinityGroup); +impl_dyn_authorized_resource_for_resource!(authz::AntiAffinityGroup); impl_dyn_authorized_resource_for_resource!(authz::Blueprint); impl_dyn_authorized_resource_for_resource!(authz::Certificate); impl_dyn_authorized_resource_for_resource!(authz::DeviceAccessToken); diff --git a/nexus/db-queries/src/policy_test/resources.rs b/nexus/db-queries/src/policy_test/resources.rs index 6ee92e167c..0465541053 100644 --- a/nexus/db-queries/src/policy_test/resources.rs +++ b/nexus/db-queries/src/policy_test/resources.rs @@ -300,6 +300,21 @@ async fn make_project( LookupType::ByName(vpc1_name.clone()), ); + let affinity_group_name = format!("{}-affinity-group1", project_name); + let affinity_group = authz::AffinityGroup::new( + project.clone(), + Uuid::new_v4(), + LookupType::ByName(affinity_group_name.clone()), + ); + + let anti_affinity_group_name = + format!("{}-anti-affinity-group1", project_name); + let anti_affinity_group = authz::AntiAffinityGroup::new( + project.clone(), + Uuid::new_v4(), + LookupType::ByName(anti_affinity_group_name.clone()), + ); + let instance_name = format!("{}-instance1", project_name); let instance = authz::Instance::new( project.clone(), @@ -313,6 +328,8 @@ async fn make_project( Uuid::new_v4(), LookupType::ByName(disk_name.clone()), )); + builder.new_resource(affinity_group.clone()); + builder.new_resource(anti_affinity_group.clone()); builder.new_resource(instance.clone()); builder.new_resource(authz::InstanceNetworkInterface::new( instance, diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 4b24e649cc..e0d43250d1 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -306,6 +306,34 @@ resource: Disk "silo1-proj1-disk1" silo1-proj1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: AffinityGroup "silo1-proj1-affinity-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-proj1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-proj1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + +resource: AntiAffinityGroup "silo1-proj1-anti-affinity-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-proj1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-proj1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Instance "silo1-proj1-instance1" USER Q R LC RP M MP CC D @@ -474,6 +502,34 @@ resource: Disk "silo1-proj2-disk1" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: AffinityGroup "silo1-proj2-affinity-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + +resource: AntiAffinityGroup "silo1-proj2-anti-affinity-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-collaborator ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + silo1-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Instance "silo1-proj2-instance1" USER Q R LC RP M MP CC D @@ -810,6 +866,34 @@ resource: Disk "silo2-proj1-disk1" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: AffinityGroup "silo2-proj1-affinity-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + +resource: AntiAffinityGroup "silo2-proj1-anti-affinity-group1" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: Instance "silo2-proj1-instance1" USER Q R LC RP M MP CC D diff --git a/nexus/db-queries/tests/output/lookup_affinity_sleds_query.sql b/nexus/db-queries/tests/output/lookup_affinity_sleds_query.sql new file mode 100644 index 0000000000..1435a0b8f6 --- /dev/null +++ b/nexus/db-queries/tests/output/lookup_affinity_sleds_query.sql @@ -0,0 +1,30 @@ +WITH + our_groups AS (SELECT group_id FROM affinity_group_instance_membership WHERE instance_id = $1), + other_instances + AS ( + SELECT + affinity_group_instance_membership.group_id, instance_id + FROM + affinity_group_instance_membership + JOIN our_groups ON affinity_group_instance_membership.group_id = our_groups.group_id + WHERE + instance_id != $2 + ), + other_instances_by_policy + AS ( + SELECT + policy, instance_id + FROM + other_instances + JOIN affinity_group ON + affinity_group.id = other_instances.group_id AND affinity_group.failure_domain = 'sled' + WHERE + affinity_group.time_deleted IS NULL + ) +SELECT + DISTINCT policy, sled_id +FROM + other_instances_by_policy + JOIN sled_resource ON + sled_resource.instance_id = other_instances_by_policy.instance_id + AND sled_resource.kind = 'instance' diff --git a/nexus/db-queries/tests/output/lookup_anti_affinity_sleds_query.sql b/nexus/db-queries/tests/output/lookup_anti_affinity_sleds_query.sql new file mode 100644 index 0000000000..d5e7cd66d8 --- /dev/null +++ b/nexus/db-queries/tests/output/lookup_anti_affinity_sleds_query.sql @@ -0,0 +1,32 @@ +WITH + our_groups + AS (SELECT group_id FROM anti_affinity_group_instance_membership WHERE instance_id = $1), + other_instances + AS ( + SELECT + anti_affinity_group_instance_membership.group_id, instance_id + FROM + anti_affinity_group_instance_membership + JOIN our_groups ON anti_affinity_group_instance_membership.group_id = our_groups.group_id + WHERE + instance_id != $2 + ), + other_instances_by_policy + AS ( + SELECT + policy, instance_id + FROM + other_instances + JOIN anti_affinity_group ON + anti_affinity_group.id = other_instances.group_id + AND anti_affinity_group.failure_domain = 'sled' + WHERE + anti_affinity_group.time_deleted IS NULL + ) +SELECT + DISTINCT policy, sled_id +FROM + other_instances_by_policy + JOIN sled_resource ON + sled_resource.instance_id = other_instances_by_policy.instance_id + AND sled_resource.kind = 'instance' From a07c46e17e48b0bca2896be593945f4403bc458c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 4 Feb 2025 15:48:01 -0800 Subject: [PATCH 2/4] review feedback --- nexus/db-queries/Cargo.toml | 1 + nexus/db-queries/src/db/datastore/sled.rs | 437 +++++++++++--------- nexus/db-queries/src/db/queries/affinity.rs | 15 +- 3 files changed, 250 insertions(+), 203 deletions(-) diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index db2b70488d..f51629f63c 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -25,6 +25,7 @@ futures.workspace = true internal-dns-resolver.workspace = true internal-dns-types.workspace = true ipnetwork.workspace = true +itertools.workspace = true macaddr.workspace = true once_cell.workspace = true oxnet.workspace = true diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index b402cade0f..bb61d8eeba 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -29,6 +29,8 @@ use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use itertools::Either; +use itertools::Itertools; use nexus_db_model::ApplySledFilterExt; use nexus_types::deployment::SledFilter; use nexus_types::external_api::views::SledPolicy; @@ -46,6 +48,7 @@ use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::SledUuid; +use slog::Logger; use std::collections::HashSet; use std::fmt; use strum::IntoEnumIterator; @@ -54,16 +57,62 @@ use uuid::Uuid; #[derive(Debug, thiserror::Error)] enum SledReservationError { - #[error("No reservation could be found")] + #[error( + "Could not find any valid sled on which this instance can be placed" + )] NotFound, - #[error("More than one sled was found with 'affinity = required'")] + #[error("This instance belongs to an affinity group that requires it be placed \ + on more than one sled. Instances can only placed on a single sled, so \ + this is impossible to satisfy - Consider stopping other instances in \ + the affinity group.")] TooManyAffinityConstraints, - #[error("A sled that is required is also considered banned by anti-affinity rules")] + #[error( + "This instance belongs to an affinity group that requires it to be \ + placed on a sled, but also belongs to an anti-affinity group that \ + prevents it from being placed on that sled. These constraints are \ + contradictory: Consider stopping instances in those \ + affinity/anti-affinity groups, or changing group membership." + )] ConflictingAntiAndAffinityConstraints, - #[error("A sled required by an affinity group is not a valid target for other reasons")] + #[error("This instance must be placed on a specific sled due to affinity \ + rules, but that placement is invalid for some other reason (e.g., \ + the instance would not fit, or the sled sharing an instance in the \ + affinity group is being expunged). Consider stopping other \ + instances in the affinity group, or changing affinity group \ + membership.")] RequiredAffinitySledNotValid, } +impl From for external::Error { + fn from(err: SledReservationError) -> Self { + let msg = format!("Failed to place instance: {err}"); + match err { + // "NotFound" can be resolved by adding more capacity + SledReservationError::NotFound + // "RequiredAffinitySledNotValid" is *usually* the result of a + // single sled filling up with several members of an Affinity group. + // + // It is also possible briefly when a sled with affinity groups is + // being expunged with running VMMs, but "insufficient capacity" is + // the much more common case. + // + // (Disambiguating these cases would require additional database + // queries, hence why this isn't being done right now) + | SledReservationError::RequiredAffinitySledNotValid => { + external::Error::insufficient_capacity(&msg, &msg) + }, + // The following cases are constraint violations due to excessive + // affinity/anti-affinity groups -- even if additional capacity is + // added, they won't be fixed. Return a 400 error to signify to the + // caller that they're responsible for fixing these constraints. + SledReservationError::TooManyAffinityConstraints + | SledReservationError::ConflictingAntiAndAffinityConstraints => { + external::Error::invalid_request(&msg) + }, + } + } +} + #[derive(Debug, thiserror::Error)] enum SledReservationTransactionError { #[error(transparent)] @@ -74,6 +123,166 @@ enum SledReservationTransactionError { Reservation(#[from] SledReservationError), } +// Chooses a sled for reservation with the supplied constraints. +// +// - "targets": All possible sleds which might be selected. +// - "anti_affinity_sleds": All sleds which are anti-affine to +// our requested reservation. +// - "affinity_sleds": All sleds which are affine to our requested +// reservation. +// +// We use the following logic to calculate a desirable sled, given a possible +// set of "targets", and the information from affinity groups. +// +// # Rules vs Preferences +// +// Due to the flavors of "affinity policy", it's possible to bucket affinity +// choices into two categories: "rules" and "preferences". "rules" are affinity +// dispositions for or against sled placement that must be followed, and +// "preferences" are affinity dispositions that should be followed for sled +// selection, in order of "most preferential" to "least preferential". +// +// As example of a "rule" is "an anti-affinity group exists, containing a +// target sled, with affinity_policy = 'fail'". +// +// An example of a "preference" is "an anti-affinity group exists, containing a +// target sled, but the policy is 'allow'." We don't want to use it as a target, +// but we will if there are no other choices. +// +// We apply rules before preferences to ensure they are always respected. +// Furthermore, the evaluation of preferences is a target-seeking operation, +// which identifies the distinct sets of targets, and searches them in +// decreasing preference order. +// +// # Logic +// +// ## Background: Notation +// +// We use the following symbols for sets below: +// - ∩: Intersection of two sets (A ∩ B is "everything that exists in A and +// also exists in B"). +// - ∖: difference of two sets (A ∖ B is "everything that exists in A that does +// not exist in B). +// +// We also use the following notation for brevity: +// - AA,P=Fail: All sleds containing instances that are part of an anti-affinity +// group with policy = 'fail'. +// - AA,P=Allow: Same as above, but with policy = 'allow'. +// - A,P=Fail: All sleds containing instances that are part of an affinity group +// with policy = 'fail'. +// - A,P=Allow: Same as above, but with policy = 'allow'. +// +// ## Affinity: Apply Rules +// +// - Targets := All viable sleds for instance placement +// - Banned := AA,P=Fail +// - Required := A,P=Fail +// - if Required.len() > 1: Fail (too many constraints). +// - if Required.len() == 1... +// - ... if the entry exists in the "Banned" set: Fail +// (contradicting constraints 'Banned' + 'Required') +// - ... if the entry does not exist in "Targets": Fail +// ('Required' constraint not satisfiable) +// - ... if the entry does not exist in "Banned": Use it. +// +// If we have not yet picked a target, we can filter the set of targets to +// ignore "banned" sleds, and then apply preferences. +// +// - Targets := Targets ∖ Banned +// +// ## Affinity: Apply Preferences +// +// - Preferred := Targets ∩ A,P=Allow +// - Unpreferred := Targets ∩ AA,P=Allow +// - Both := Preferred ∩ Unpreferred +// - Preferred := Preferred ∖ Both +// - Unpreferred := Unpreferred ∖ Both +// - If Preferred isn't empty, pick a target from it. +// - Targets := Targets \ Unpreferred +// - If Targets isn't empty, pick a target from it. +// - If Unpreferred isn't empty, pick a target from it. +// - Fail, no targets are available. +fn pick_sled_reservation_target( + log: &Logger, + mut targets: HashSet, + anti_affinity_sleds: Vec<(AffinityPolicy, Uuid)>, + affinity_sleds: Vec<(AffinityPolicy, Uuid)>, +) -> Result { + let (banned, mut unpreferred): (HashSet<_>, HashSet<_>) = + anti_affinity_sleds.into_iter().partition_map(|(policy, id)| { + let id = SledUuid::from_untyped_uuid(id); + match policy { + AffinityPolicy::Fail => Either::Left(id), + AffinityPolicy::Allow => Either::Right(id), + } + }); + let (required, mut preferred): (HashSet<_>, HashSet<_>) = + affinity_sleds.into_iter().partition_map(|(policy, id)| { + let id = SledUuid::from_untyped_uuid(id); + match policy { + AffinityPolicy::Fail => Either::Left(id), + AffinityPolicy::Allow => Either::Right(id), + } + }); + + if !banned.is_empty() { + info!( + log, + "anti-affinity policy prohibits placement on {} sleds", banned.len(); + "banned" => ?banned, + ); + } + if !required.is_empty() { + info!( + log, + "affinity policy requires placement on {} sleds", required.len(); + "required" => ?required, + ); + } + + if required.len() > 1 { + return Err(SledReservationError::TooManyAffinityConstraints); + } + if let Some(required_id) = required.iter().next() { + // If we have a "required" sled, it must be chosen. + if banned.contains(&required_id) { + return Err( + SledReservationError::ConflictingAntiAndAffinityConstraints, + ); + } + if !targets.contains(&required_id) { + return Err(SledReservationError::RequiredAffinitySledNotValid); + } + return Ok(*required_id); + } + + // We have no "required" sleds, but might have preferences. + targets = targets.difference(&banned).cloned().collect(); + + // Only consider "preferred" sleds that are viable targets + preferred = targets.intersection(&preferred).cloned().collect(); + // Only consider "unpreferred" sleds that are viable targets + unpreferred = targets.intersection(&unpreferred).cloned().collect(); + + // If a target is both preferred and unpreferred, it is removed + // from both sets. + let both = preferred.intersection(&unpreferred).cloned().collect(); + preferred = preferred.difference(&both).cloned().collect(); + unpreferred = unpreferred.difference(&both).cloned().collect(); + + if let Some(target) = preferred.iter().next() { + return Ok(*target); + } + targets = targets.difference(&unpreferred).cloned().collect(); + if let Some(target) = targets.iter().next() { + return Ok(*target); + } + if let Some(target) = unpreferred.iter().next() { + return Ok(*target); + } + return Err(SledReservationError::NotFound); +} + impl DataStore { /// Stores a new sled in the database. /// @@ -231,18 +440,7 @@ impl DataStore { SledReservationTransactionError::Diesel(e) => { public_error_from_diesel(e, ErrorHandler::Server) } - SledReservationTransactionError::Reservation(e) => match e { - SledReservationError::NotFound - | SledReservationError::TooManyAffinityConstraints - | SledReservationError::ConflictingAntiAndAffinityConstraints - | SledReservationError::RequiredAffinitySledNotValid => { - return external::Error::insufficient_capacity( - "No sleds can fit the requested instance", - "No sled targets found that had enough \ - capacity to fit the requested instance.", - ); - } - }, + SledReservationTransactionError::Reservation(e) => e.into(), }) } @@ -367,177 +565,26 @@ impl DataStore { instance_id, ).get_results_async::<(AffinityPolicy, Uuid)>(&conn).await?; - // We use the following logic to calculate a desirable sled, - // given a possible set of "targets", and the information - // from affinity groups. - // - // # Rules vs Preferences - // - // Due to the flavors "affinity policy", it's possible to bucket affinity - // choices into two categories: "rules" and "preferences". "rules" are affinity - // dispositions for or against sled placement that must be followed, and - // "preferences" are affinity dispositions that should be followed for sled - // selection, in order of "most preferential" to "least preferential". - // - // As example of a "rule" is "an anti-affinity group exists, containing a - // target sled, with affinity_policy = 'fail'". - // - // An example of a "preference" is "an anti-affinity group exists, containing a - // target sled, but the policy is 'allow'. We don't want to use it as a target, - // but we will if there are no other choices." - // - // We apply rules before preferences to ensure they are always respected. - // Furthermore, the evaluation of preferences is a target-seeking operation, - // which identifies the distinct sets of targets, and searches them in - // decreasing preference order. - // - // # Logic - // - // ## Background: Notation - // - // We use the following symbols for sets below: - // - ∩: Intersection of two sets (A ∩ B is "everything that exists in A and - // also exists in B"). - // - \: difference of two sets (A \ B is "everything that exists in A that does - // not exist in B). - // - // We also use the following notation for brevity: - // - AA,P=Fail: All sleds sharing an anti-affinity instance within a group with - // policy = 'fail'. - // - AA,P=Allow: Same as above, but with policy = 'allow'. - // - A,P=Fail: All sleds sharing an affinity instance within a group with - // policy = 'fail'. - // - A,P=Allow: Same as above, but with policy = 'allow'. - // - // ## Affinity: Apply Rules - // - // - Targets := All viable sleds for instance placement - // - Banned := AA,P=Fail - // - Required := A,P=Fail - // - if Required.len() > 1: Fail (too many constraints). - // - if Required.len() == 1... - // - ... if the entry exists in the "Banned" set: Fail - // (contradicting constraints 'Banned' + 'Required') - // - ... if the entry does not exist in "Targets": Fail - // ('Required' constraint not satisfiable) - // - ... if the entry does not exist in "Banned": Use it. - // - // If we have not yet picked a target, we can filter the - // set of targets to ignore "banned" sleds, and then apply - // preferences. - // - // - Targets := Targets \ Banned - // - // ## Affinity: Apply Preferences - // - // - Preferred := Targets ∩ A,P=Allow - // - Unpreferred := Targets ∩ AA,P=Allow - // - Neither := Preferred ∩ Unpreferred - // - Preferred := Preferred \ Neither - // - Unpreferred := Unpreferred \ Neither - // - If Preferred isn't empty, pick a target from it. - // - Targets := Targets \ Unpreferred - // - If Targets isn't empty, pick a target from it. - // - If Unpreferred isn't empty, pick a target from it. - // - Fail, no targets are available. - - let mut targets: HashSet<_> = sled_targets.into_iter().collect(); - - let banned = anti_affinity_sleds.iter().filter_map(|(policy, id)| { - if *policy == AffinityPolicy::Fail { - Some(*id) - } else { - None - } - }).collect::>(); - let required = affinity_sleds.iter().filter_map(|(policy, id)| { - if *policy == AffinityPolicy::Fail { - Some(*id) - } else { - None - } - }).collect::>(); - - if !banned.is_empty() { - info!( - opctx.log, - "affinity policy prohibits placement on {} sleds", banned.len(); - "banned" => ?banned, - ); - } - if !required.is_empty() { - info!( - opctx.log, - "affinity policy requires placement on {} sleds", required.len(); - "required" => ?required, - ); - } - - let sled_target = if required.len() > 1 { - return Err(err.bail(SledReservationError::TooManyAffinityConstraints)); - } else if let Some(required_id) = required.iter().next() { - // If we have a "required" sled, it must be chosen. - - if banned.contains(&required_id) { - return Err(err.bail(SledReservationError::ConflictingAntiAndAffinityConstraints)); - } - if !targets.contains(&required_id) { - return Err(err.bail(SledReservationError::RequiredAffinitySledNotValid)); - } - *required_id - } else { - // We have no "required" sleds, but might have preferences. + let targets: HashSet = sled_targets + .into_iter() + .map(|id| SledUuid::from_untyped_uuid(id)) + .collect(); - targets = targets.difference(&banned).cloned().collect(); - - let mut preferred = affinity_sleds.iter().filter_map(|(policy, id)| { - if *policy == AffinityPolicy::Allow { - Some(*id) - } else { - None - } - }).collect::>(); - let mut unpreferred = anti_affinity_sleds.iter().filter_map(|(policy, id)| { - if *policy == AffinityPolicy::Allow { - Some(*id) - } else { - None - } - }).collect::>(); - - // Only consider "preferred" sleds that are viable targets - preferred = targets.intersection(&preferred).cloned().collect(); - // Only consider "unpreferred" sleds that are viable targets - unpreferred = targets.intersection(&unpreferred).cloned().collect(); - - // If a target is both preferred and unpreferred, it is removed - // from both sets. - let both = preferred.intersection(&unpreferred).cloned().collect(); - preferred = preferred.difference(&both).cloned().collect(); - unpreferred = unpreferred.difference(&both).cloned().collect(); - - if let Some(target) = preferred.iter().next() { - *target - } else { - targets = targets.difference(&unpreferred).cloned().collect(); - if let Some(target) = targets.iter().next() { - *target - } else { - if let Some(target) = unpreferred.iter().next() { - *target - } else { - return Err(err.bail(SledReservationError::NotFound)); - } - } - } - }; + let sled_target = pick_sled_reservation_target( + &opctx.log, + targets, + anti_affinity_sleds, + affinity_sleds, + ).map_err(|e| { + err.bail(e) + })?; // Create a SledResource record, associate it with the target // sled. let resource = SledResource::new_for_vmm( propolis_id, instance_id, - SledUuid::from_untyped_uuid(sled_target), + sled_target, resources, ); @@ -1082,6 +1129,8 @@ pub(in crate::db::datastore) mod test { use nexus_types::identity::Resource; use omicron_common::api::external; use omicron_test_utils::dev; + use omicron_uuid_kinds::AffinityGroupUuid; + use omicron_uuid_kinds::AntiAffinityGroupUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; @@ -1451,20 +1500,16 @@ pub(in crate::db::datastore) mod test { // creating affinity groups, but makes testing easier. async fn add_instance_to_anti_affinity_group( datastore: &DataStore, - group_id: Uuid, - instance_id: Uuid, + group_id: AntiAffinityGroupUuid, + instance_id: InstanceUuid, ) { use db::model::AntiAffinityGroupInstanceMembership; use db::schema::anti_affinity_group_instance_membership::dsl as membership_dsl; - use omicron_uuid_kinds::AntiAffinityGroupUuid; diesel::insert_into( membership_dsl::anti_affinity_group_instance_membership, ) - .values(AntiAffinityGroupInstanceMembership::new( - AntiAffinityGroupUuid::from_untyped_uuid(group_id), - InstanceUuid::from_untyped_uuid(instance_id), - )) + .values(AntiAffinityGroupInstanceMembership::new(group_id, instance_id)) .on_conflict((membership_dsl::group_id, membership_dsl::instance_id)) .do_nothing() .execute_async(&*datastore.pool_connection_for_tests().await.unwrap()) @@ -1503,18 +1548,14 @@ pub(in crate::db::datastore) mod test { // creating affinity groups, but makes testing easier. async fn add_instance_to_affinity_group( datastore: &DataStore, - group_id: Uuid, - instance_id: Uuid, + group_id: AffinityGroupUuid, + instance_id: InstanceUuid, ) { use db::model::AffinityGroupInstanceMembership; use db::schema::affinity_group_instance_membership::dsl as membership_dsl; - use omicron_uuid_kinds::AffinityGroupUuid; diesel::insert_into(membership_dsl::affinity_group_instance_membership) - .values(AffinityGroupInstanceMembership::new( - AffinityGroupUuid::from_untyped_uuid(group_id), - InstanceUuid::from_untyped_uuid(instance_id), - )) + .values(AffinityGroupInstanceMembership::new(group_id, instance_id)) .on_conflict(( membership_dsl::group_id, membership_dsl::instance_id, @@ -1667,16 +1708,16 @@ pub(in crate::db::datastore) mod test { Affinity::Positive => { add_instance_to_affinity_group( &datastore, - *group_id, - self.id.into_untyped_uuid(), + AffinityGroupUuid::from_untyped_uuid(*group_id), + self.id, ) .await } Affinity::Negative => { add_instance_to_anti_affinity_group( &datastore, - *group_id, - self.id.into_untyped_uuid(), + AntiAffinityGroupUuid::from_untyped_uuid(*group_id), + self.id, ) .await } diff --git a/nexus/db-queries/src/db/queries/affinity.rs b/nexus/db-queries/src/db/queries/affinity.rs index 6da857669d..24806fe85d 100644 --- a/nexus/db-queries/src/db/queries/affinity.rs +++ b/nexus/db-queries/src/db/queries/affinity.rs @@ -10,9 +10,11 @@ use diesel::sql_types; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; -/// For an instance, look up all anti-affinity groups, and return -/// a list of sleds with other instances in that anti-affinity group -/// already reserved. +/// For an instance, look up all anti-affinity groups it belongs to. +/// For all those groups, find all instances with reservations, and look +/// up their sleds. +/// Return all the sleds on which those instances were found, along with +/// policy information about the corresponding group. pub fn lookup_anti_affinity_sleds_query( instance_id: InstanceUuid, ) -> TypedSqlQuery<(AffinityPolicyEnum, sql_types::Uuid)> { @@ -49,8 +51,11 @@ pub fn lookup_anti_affinity_sleds_query( .query() } -/// For an instance, look up all affinity groups, and return a list of sleds -/// with other instances in that affinity group already reserved. +/// For an instance, look up all affinity groups it belongs to. +/// For all those groups, find all instances with reservations, and look +/// up their sleds. +/// Return all the sleds on which those instances were found, along with +/// policy information about the corresponding group. pub fn lookup_affinity_sleds_query( instance_id: InstanceUuid, ) -> TypedSqlQuery<(AffinityPolicyEnum, sql_types::Uuid)> { From fdccd6bd041f501d4e03d5850e161b726b284881 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 5 Feb 2025 11:31:17 -0800 Subject: [PATCH 3/4] review feedback (grammatical) --- nexus/db-queries/src/db/datastore/sled.rs | 31 +++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index bb61d8eeba..217ab71718 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -61,25 +61,28 @@ enum SledReservationError { "Could not find any valid sled on which this instance can be placed" )] NotFound, - #[error("This instance belongs to an affinity group that requires it be placed \ - on more than one sled. Instances can only placed on a single sled, so \ - this is impossible to satisfy - Consider stopping other instances in \ - the affinity group.")] + #[error( + "This instance belongs to an affinity group that requires it be placed \ + on more than one sled. Instances can only placed on a single sled, so \ + this is impossible to satisfy. Consider stopping other instances in \ + the affinity group." + )] TooManyAffinityConstraints, #[error( "This instance belongs to an affinity group that requires it to be \ - placed on a sled, but also belongs to an anti-affinity group that \ - prevents it from being placed on that sled. These constraints are \ - contradictory: Consider stopping instances in those \ - affinity/anti-affinity groups, or changing group membership." + placed on a sled, but also belongs to an anti-affinity group that \ + prevents it from being placed on that sled. These constraints are \ + contradictory. Consider stopping instances in those \ + affinity/anti-affinity groups, or changing group membership." )] ConflictingAntiAndAffinityConstraints, - #[error("This instance must be placed on a specific sled due to affinity \ - rules, but that placement is invalid for some other reason (e.g., \ - the instance would not fit, or the sled sharing an instance in the \ - affinity group is being expunged). Consider stopping other \ - instances in the affinity group, or changing affinity group \ - membership.")] + #[error( + "This instance must be placed on a specific sled to co-locate it \ + with another instance in its affinity group, but that sled cannot \ + current accept this instance. Consider stopping other instances \ + in this instance's affinity groups, or changing its affinity \ + group membership." + )] RequiredAffinitySledNotValid, } From 97b9b2e7599d65347a4b31ea24f457f63cfd9f49 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 5 Feb 2025 16:16:22 -0800 Subject: [PATCH 4/4] Eliza's feedback --- nexus/db-queries/src/db/datastore/sled.rs | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 217ab71718..9621515e17 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -267,21 +267,25 @@ fn pick_sled_reservation_target( // Only consider "unpreferred" sleds that are viable targets unpreferred = targets.intersection(&unpreferred).cloned().collect(); - // If a target is both preferred and unpreferred, it is removed - // from both sets. + // If a target is both preferred and unpreferred, it is not considered + // a part of either set. let both = preferred.intersection(&unpreferred).cloned().collect(); - preferred = preferred.difference(&both).cloned().collect(); - unpreferred = unpreferred.difference(&both).cloned().collect(); - if let Some(target) = preferred.iter().next() { - return Ok(*target); + // Grab a preferred target (which isn't also unpreferred) if one exists. + if let Some(target) = preferred.difference(&both).cloned().next() { + return Ok(target); } + unpreferred = unpreferred.difference(&both).cloned().collect(); targets = targets.difference(&unpreferred).cloned().collect(); - if let Some(target) = targets.iter().next() { - return Ok(*target); + + // Grab a target which not in the unpreferred set, if one exists. + if let Some(target) = targets.iter().cloned().next() { + return Ok(target); } - if let Some(target) = unpreferred.iter().next() { - return Ok(*target); + + // Grab a target from the unpreferred set, if one exists. + if let Some(target) = unpreferred.iter().cloned().next() { + return Ok(target); } return Err(SledReservationError::NotFound); }