From ace10ab3c7bed850c9c7dc39e2062ff51485499c Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Fri, 15 Mar 2024 13:25:04 +0000 Subject: [PATCH 01/38] break memory leaks due to cyclic references --- zenoh/src/net/primitives/mux.rs | 28 ++++++++--------- zenoh/src/net/routing/dispatcher/face.rs | 26 +++++++++++++++- zenoh/src/net/routing/dispatcher/resource.rs | 4 ++- .../net/routing/hat/linkstate_peer/network.rs | 20 +++++++------ zenoh/src/net/routing/hat/p2p_peer/gossip.rs | 14 +++++---- zenoh/src/net/routing/router.rs | 2 +- zenoh/src/net/runtime/mod.rs | 30 +++++++++++++++---- zenoh/src/session.rs | 7 +++-- 8 files changed, 91 insertions(+), 40 deletions(-) diff --git a/zenoh/src/net/primitives/mux.rs b/zenoh/src/net/primitives/mux.rs index 442c040624..0aad0d8584 100644 --- a/zenoh/src/net/primitives/mux.rs +++ b/zenoh/src/net/primitives/mux.rs @@ -13,7 +13,7 @@ // use super::{EPrimitives, Primitives}; use crate::net::routing::{ - dispatcher::face::Face, + dispatcher::face::{Face, WeakFace}, interceptor::{InterceptorTrait, InterceptorsChain}, RoutingContext, }; @@ -25,7 +25,7 @@ use zenoh_transport::{multicast::TransportMulticast, unicast::TransportUnicast}; pub struct Mux { pub handler: TransportUnicast, - pub(crate) face: OnceLock, + pub(crate) face: OnceLock, pub(crate) interceptor: InterceptorsChain, } @@ -48,14 +48,14 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get() { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() .and_then(|we| (!we.has_suffix()).then(|| ctx.prefix())) .flatten() .cloned(); - let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(face)); + let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(&face)); if let Some(ctx) = self.interceptor.intercept(ctx, cache) { let _ = self.handler.schedule(ctx.msg); } @@ -72,14 +72,14 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get() { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() .and_then(|we| (!we.has_suffix()).then(|| ctx.prefix())) .flatten() .cloned(); - let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(face)); + let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(&face)); if let Some(ctx) = self.interceptor.intercept(ctx, cache) { let _ = self.handler.schedule(ctx.msg); } @@ -96,14 +96,14 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get() { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() .and_then(|we| (!we.has_suffix()).then(|| ctx.prefix())) .flatten() .cloned(); - let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(face)); + let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(&face)); if let Some(ctx) = self.interceptor.intercept(ctx, cache) { let _ = self.handler.schedule(ctx.msg); } @@ -120,14 +120,14 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get() { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() .and_then(|we| (!we.has_suffix()).then(|| ctx.prefix())) .flatten() .cloned(); - let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(face)); + let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(&face)); if let Some(ctx) = self.interceptor.intercept(ctx, cache) { let _ = self.handler.schedule(ctx.msg); } @@ -144,14 +144,14 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get() { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() .and_then(|we| (!we.has_suffix()).then(|| ctx.prefix())) .flatten() .cloned(); - let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(face)); + let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(&face)); if let Some(ctx) = self.interceptor.intercept(ctx, cache) { let _ = self.handler.schedule(ctx.msg); } @@ -199,14 +199,14 @@ impl EPrimitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get() { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() .and_then(|we| (!we.has_suffix()).then(|| ctx.prefix())) .flatten() .cloned(); - let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(face)); + let cache = prefix.as_ref().and_then(|p| p.get_egress_cache(&face)); if let Some(ctx) = self.interceptor.intercept(ctx, cache) { let _ = self.handler.schedule(ctx.msg); } diff --git a/zenoh/src/net/routing/dispatcher/face.rs b/zenoh/src/net/routing/dispatcher/face.rs index 6ef5c063d0..9be11ad7d1 100644 --- a/zenoh/src/net/routing/dispatcher/face.rs +++ b/zenoh/src/net/routing/dispatcher/face.rs @@ -20,7 +20,7 @@ use crate::KeyExpr; use std::any::Any; use std::collections::HashMap; use std::fmt; -use std::sync::Arc; +use std::sync::{Arc, Weak}; use zenoh_protocol::zenoh::RequestBody; use zenoh_protocol::{ core::{ExprId, WhatAmI, ZenohId}, @@ -150,12 +150,36 @@ impl fmt::Display for FaceState { } } +#[derive(Clone)] +pub struct WeakFace { + pub(crate) tables: Weak, + pub(crate) state: Weak, +} + +impl WeakFace { + pub fn upgrade(&self) -> Option { + Some(Face { + tables: self.tables.upgrade()?, + state: self.state.upgrade()? + }) + } +} + #[derive(Clone)] pub struct Face { pub(crate) tables: Arc, pub(crate) state: Arc, } +impl Face { + pub fn downgrade(&self) -> WeakFace { + WeakFace { + tables: Arc::downgrade(&self.tables), + state: Arc::downgrade(&self.state) + } + } +} + impl Primitives for Face { fn send_declare(&self, msg: zenoh_protocol::network::Declare) { let ctrl_lock = zlock!(self.tables.ctrl_lock); diff --git a/zenoh/src/net/routing/dispatcher/resource.rs b/zenoh/src/net/routing/dispatcher/resource.rs index 26a498461a..c1a2dc2e2a 100644 --- a/zenoh/src/net/routing/dispatcher/resource.rs +++ b/zenoh/src/net/routing/dispatcher/resource.rs @@ -292,7 +292,8 @@ impl Resource { let mut resclone = res.clone(); let mutres = get_mut_unchecked(&mut resclone); if let Some(ref mut parent) = mutres.parent { - if Arc::strong_count(res) <= 3 && res.childs.is_empty() { + if Arc::strong_count(res) <= 3 && res.childs.is_empty() { + // consider only childless resource held by only one external object (+ 1 strong count for resclone, + 1 strong count for res.parent to a total of 3 ) log::debug!("Unregister resource {}", res.expr()); if let Some(context) = mutres.context.as_mut() { for match_ in &mut context.matches { @@ -306,6 +307,7 @@ impl Resource { } } } + mutres.nonwild_prefix.take(); { get_mut_unchecked(parent).childs.remove(&res.suffix); } diff --git a/zenoh/src/net/routing/hat/linkstate_peer/network.rs b/zenoh/src/net/routing/hat/linkstate_peer/network.rs index 182a721a27..8e2d64c014 100644 --- a/zenoh/src/net/routing/hat/linkstate_peer/network.rs +++ b/zenoh/src/net/routing/hat/linkstate_peer/network.rs @@ -15,6 +15,7 @@ use crate::net::codec::Zenoh080Routing; use crate::net::protocol::linkstate::{LinkState, LinkStateList}; use crate::net::routing::dispatcher::tables::NodeId; use crate::net::runtime::Runtime; +use crate::runtime::WeakRuntime; use petgraph::graph::NodeIndex; use petgraph::visit::{VisitMap, Visitable}; use std::convert::TryInto; @@ -115,7 +116,7 @@ pub(super) struct Network { pub(super) trees: Vec, pub(super) distances: Vec, pub(super) graph: petgraph::stable_graph::StableUnGraph, - pub(super) runtime: Runtime, + pub(super) runtime: WeakRuntime, } impl Network { @@ -155,7 +156,7 @@ impl Network { }], distances: vec![0.0], graph, - runtime, + runtime: Runtime::downgrade(&runtime), } } @@ -247,7 +248,7 @@ impl Network { whatami: self.graph[idx].whatami, locators: if details.locators { if idx == self.idx { - Some(self.runtime.get_locators()) + Some(self.runtime.upgrade().unwrap().get_locators()) } else { self.graph[idx].locators.clone() } @@ -336,6 +337,7 @@ impl Network { pub(super) fn link_states(&mut self, link_states: Vec, src: ZenohId) -> Changes { log::trace!("{} Received from {} raw: {:?}", self.name, src, link_states); + let strong_runtime = self.runtime.upgrade().unwrap(); let graph = &self.graph; let links = &mut self.links; @@ -487,13 +489,13 @@ impl Network { if !self.autoconnect.is_empty() { // Connect discovered peers if zenoh_runtime::ZRuntime::Net - .block_in_place(self.runtime.manager().get_transport_unicast(&zid)) + .block_in_place(strong_runtime.manager().get_transport_unicast(&zid)) .is_none() && self.autoconnect.matches(whatami) { if let Some(locators) = locators { - let runtime = self.runtime.clone(); - self.runtime.spawn(async move { + let runtime = strong_runtime.clone(); + strong_runtime.spawn(async move { // random backoff tokio::time::sleep(std::time::Duration::from_millis( rand::random::() % 100, @@ -607,15 +609,15 @@ impl Network { let node = &self.graph[*idx]; if let Some(whatami) = node.whatami { if zenoh_runtime::ZRuntime::Net - .block_in_place(self.runtime.manager().get_transport_unicast(&node.zid)) + .block_in_place(strong_runtime.manager().get_transport_unicast(&node.zid)) .is_none() && self.autoconnect.matches(whatami) { if let Some(locators) = &node.locators { - let runtime = self.runtime.clone(); + let runtime = strong_runtime.clone(); let zid = node.zid; let locators = locators.clone(); - self.runtime.spawn(async move { + strong_runtime.spawn(async move { // random backoff tokio::time::sleep(std::time::Duration::from_millis( rand::random::() % 100, diff --git a/zenoh/src/net/routing/hat/p2p_peer/gossip.rs b/zenoh/src/net/routing/hat/p2p_peer/gossip.rs index 247412bfdf..f651ccdc0d 100644 --- a/zenoh/src/net/routing/hat/p2p_peer/gossip.rs +++ b/zenoh/src/net/routing/hat/p2p_peer/gossip.rs @@ -14,6 +14,7 @@ use crate::net::codec::Zenoh080Routing; use crate::net::protocol::linkstate::{LinkState, LinkStateList}; use crate::net::runtime::Runtime; +use crate::runtime::WeakRuntime; use petgraph::graph::NodeIndex; use std::convert::TryInto; use vec_map::VecMap; @@ -93,7 +94,7 @@ pub(super) struct Network { pub(super) idx: NodeIndex, pub(super) links: VecMap, pub(super) graph: petgraph::stable_graph::StableUnGraph, - pub(super) runtime: Runtime, + pub(super) runtime: WeakRuntime, } impl Network { @@ -124,7 +125,7 @@ impl Network { idx, links: VecMap::new(), graph, - runtime, + runtime: Runtime::downgrade(&runtime), } } @@ -191,7 +192,7 @@ impl Network { whatami: self.graph[idx].whatami, locators: if details.locators { if idx == self.idx { - Some(self.runtime.get_locators()) + Some(self.runtime.upgrade().unwrap().get_locators()) } else { self.graph[idx].locators.clone() } @@ -266,6 +267,7 @@ impl Network { pub(super) fn link_states(&mut self, link_states: Vec, src: ZenohId) { log::trace!("{} Received from {} raw: {:?}", self.name, src, link_states); + let strong_runtime = self.runtime.upgrade().unwrap(); let graph = &self.graph; let links = &mut self.links; @@ -407,13 +409,13 @@ impl Network { if !self.autoconnect.is_empty() { // Connect discovered peers if zenoh_runtime::ZRuntime::Net - .block_in_place(self.runtime.manager().get_transport_unicast(&zid)) + .block_in_place(strong_runtime.manager().get_transport_unicast(&zid)) .is_none() && self.autoconnect.matches(whatami) { if let Some(locators) = locators { - let runtime = self.runtime.clone(); - self.runtime.spawn(async move { + let runtime = strong_runtime.clone(); + strong_runtime.spawn(async move { // random backoff tokio::time::sleep(std::time::Duration::from_millis( rand::random::() % 100, diff --git a/zenoh/src/net/routing/router.rs b/zenoh/src/net/routing/router.rs index d67a2baa9d..c80d3bdc09 100644 --- a/zenoh/src/net/routing/router.rs +++ b/zenoh/src/net/routing/router.rs @@ -155,7 +155,7 @@ impl Router { state: newface, }; - let _ = mux.face.set(face.clone()); + let _ = mux.face.set(Face::downgrade(&face)); ctrl_lock.new_transport_unicast_face(&mut tables, &self.tables, &mut face, &transport)?; diff --git a/zenoh/src/net/runtime/mod.rs b/zenoh/src/net/runtime/mod.rs index 282c45f66c..8351c8f352 100644 --- a/zenoh/src/net/runtime/mod.rs +++ b/zenoh/src/net/runtime/mod.rs @@ -29,7 +29,7 @@ pub use adminspace::AdminSpace; use futures::stream::StreamExt; use futures::Future; use std::any::Any; -use std::sync::Arc; +use std::sync::{Arc, Weak}; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use uhlc::{HLCBuilder, HLC}; @@ -57,6 +57,16 @@ struct RuntimeState { token: CancellationToken, } +pub struct WeakRuntime { + state: Weak, +} + +impl WeakRuntime { + pub fn upgrade(&self) -> Option { + self.state.upgrade().map(|state| Runtime { state }) + } +} + #[derive(Clone)] pub struct Runtime { state: Arc, @@ -97,7 +107,7 @@ impl Runtime { let router = Arc::new(Router::new(zid, whatami, hlc.clone(), &config)?); let handler = Arc::new(RuntimeTransportEventHandler { - runtime: std::sync::RwLock::new(None), + runtime: std::sync::RwLock::new(WeakRuntime { state: Weak::new() }), }); let transport_manager = TransportManager::builder() @@ -123,7 +133,7 @@ impl Runtime { token: CancellationToken::new(), }), }; - *handler.runtime.write().unwrap() = Some(runtime.clone()); + *handler.runtime.write().unwrap() = Runtime::downgrade(&runtime); get_mut_unchecked(&mut runtime.state.router.clone()).init_link_state(runtime.clone()); let receiver = config.subscribe(); @@ -158,6 +168,8 @@ impl Runtime { // TODO: Check this whether is able to terminate all spawned task by Runtime::spawn self.state.token.cancel(); self.manager().close().await; + // clean up to break cyclic reference of self.state to itself + self.state.transport_handlers.write().unwrap().clear(); Ok(()) } @@ -202,10 +214,16 @@ impl Runtime { pub fn whatami(&self) -> WhatAmI { self.state.whatami } + + pub fn downgrade(this: &Runtime) -> WeakRuntime { + WeakRuntime { + state: Arc::downgrade(&this.state), + } + } } struct RuntimeTransportEventHandler { - runtime: std::sync::RwLock>, + runtime: std::sync::RwLock, } impl TransportEventHandler for RuntimeTransportEventHandler { @@ -214,7 +232,7 @@ impl TransportEventHandler for RuntimeTransportEventHandler { peer: TransportPeer, transport: TransportUnicast, ) -> ZResult> { - match zread!(self.runtime).as_ref() { + match zread!(self.runtime).upgrade().as_ref() { Some(runtime) => { let slave_handlers: Vec> = zread!(runtime.state.transport_handlers) @@ -242,7 +260,7 @@ impl TransportEventHandler for RuntimeTransportEventHandler { &self, transport: TransportMulticast, ) -> ZResult> { - match zread!(self.runtime).as_ref() { + match zread!(self.runtime).upgrade().as_ref() { Some(runtime) => { let slave_handlers: Vec> = zread!(runtime.state.transport_handlers) diff --git a/zenoh/src/session.rs b/zenoh/src/session.rs index 7290d0aeac..4f31c32e8a 100644 --- a/zenoh/src/session.rs +++ b/zenoh/src/session.rs @@ -520,8 +520,11 @@ impl Session { trace!("close()"); self.runtime.close().await?; - let primitives = zwrite!(self.state).primitives.as_ref().unwrap().clone(); - primitives.send_close(); + let mut state = zwrite!(self.state); + state.primitives.as_ref().unwrap().send_close(); + // clean up to break cyclic references from self.state to itself + state.primitives.take(); + state.queryables.clear(); Ok(()) }) From 301ab460174598e16e22534ddbc83741307bb2de Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Fri, 15 Mar 2024 15:50:34 +0000 Subject: [PATCH 02/38] terminate tasks to prevent corresponding memory leaks --- Cargo.toml | 2 + commons/zenoh-task/Cargo.toml | 31 +++++ commons/zenoh-task/src/lib.rs | 103 ++++++++++++++ io/zenoh-transport/Cargo.toml | 1 + io/zenoh-transport/src/manager.rs | 38 ++---- zenoh/Cargo.toml | 1 + zenoh/src/net/primitives/mux.rs | 12 +- zenoh/src/net/routing/dispatcher/face.rs | 4 +- zenoh/src/net/routing/dispatcher/resource.rs | 2 +- .../net/routing/hat/linkstate_peer/network.rs | 4 +- zenoh/src/session.rs | 126 ++++++++++-------- 11 files changed, 232 insertions(+), 92 deletions(-) create mode 100644 commons/zenoh-task/Cargo.toml create mode 100644 commons/zenoh-task/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index d9be6e3685..b069989d68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "commons/zenoh-result", "commons/zenoh-shm", "commons/zenoh-sync", + "commons/zenoh-task", "commons/zenoh-util", "commons/zenoh-runtime", "examples", @@ -194,6 +195,7 @@ zenoh-link = { version = "0.11.0-dev", path = "io/zenoh-link" } zenoh-link-commons = { version = "0.11.0-dev", path = "io/zenoh-link-commons" } zenoh = { version = "0.11.0-dev", path = "zenoh", default-features = false } zenoh-runtime = { version = "0.11.0-dev", path = "commons/zenoh-runtime" } +zenoh-task = { version = "0.11.0-dev", path = "commons/zenoh-task" } [profile.dev] debug = true diff --git a/commons/zenoh-task/Cargo.toml b/commons/zenoh-task/Cargo.toml new file mode 100644 index 0000000000..c0f7ec0403 --- /dev/null +++ b/commons/zenoh-task/Cargo.toml @@ -0,0 +1,31 @@ +# +# Copyright (c) 2024 ZettaScale Technology +# +# This program and the accompanying materials are made available under the +# terms of the Eclipse Public License 2.0 which is available at +# http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +# which is available at https://www.apache.org/licenses/LICENSE-2.0. +# +# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +# +# Contributors: +# ZettaScale Zenoh Team, +# +[package] +rust-version = { workspace = true } +name = "zenoh-task" +version = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +authors = {workspace = true } +edition = { workspace = true } +license = { workspace = true } +categories = { workspace = true } +description = "Internal crate for zenoh." +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +tokio = { workspace = true, features = ["default", "sync"] } +futures = { workspace = true } +zenoh-runtime = { workspace = true } +rand = { workspace = true } \ No newline at end of file diff --git a/commons/zenoh-task/src/lib.rs b/commons/zenoh-task/src/lib.rs new file mode 100644 index 0000000000..727b894ede --- /dev/null +++ b/commons/zenoh-task/src/lib.rs @@ -0,0 +1,103 @@ +// +// Copyright (c) 2024 ZettaScale Technology +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +// which is available at https://www.apache.org/licenses/LICENSE-2.0. +// +// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +// +// Contributors: +// ZettaScale Zenoh Team, +// + +//! ⚠️ WARNING ⚠️ +//! +//! This module is intended for Zenoh's internal use. +//! +//! [Click here for Zenoh's documentation](../zenoh/index.html) + +use rand::random; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::future::Future; +use std::ops::DerefMut; +use std::sync::Arc; +use std::sync::Mutex; +use tokio::task::{self, JoinHandle}; +use zenoh_runtime::ZRuntime; + +#[derive(Clone)] +pub struct TaskController { + running_task_id_to_handle: Arc>>>>, +} + +impl Default for TaskController { + fn default() -> Self { + TaskController { + running_task_id_to_handle: Arc::new(Mutex::new( + HashMap::>>::new(), + )), + } + } +} + +impl TaskController { + /// Spawns a task (similarly to task::spawn) that can be later terminated by call to terminate_all() + /// Task output is ignored + pub fn spawn(&self, future: F) + where + F: Future + Send + 'static, + T: Send + 'static, + { + let mut tasks = self.running_task_id_to_handle.lock().unwrap(); + let id = TaskController::get_next_task_id(tasks.deref_mut()); + let tasks_mutex = self.running_task_id_to_handle.clone(); + let jh = task::spawn(futures::FutureExt::map(future, move |_| { + tasks_mutex.lock().unwrap().remove(&id); + })); + tasks.insert(id, Some(jh)); + } + + /// Spawns a task using a specified runtime (similarly to Runtime::spawn) that can be later terminated by call to terminate_all() + /// Task output is ignored + pub fn spawn_with_rt(&self, rt: ZRuntime, future: F) + where + F: Future + Send + 'static, + T: Send + 'static, + { + let mut tasks = self.running_task_id_to_handle.lock().unwrap(); + let id = TaskController::get_next_task_id(tasks.deref_mut()); + let tasks_mutex = self.running_task_id_to_handle.clone(); + let jh = rt.spawn(futures::FutureExt::map(future, move |_| { + tasks_mutex.lock().unwrap().remove(&id); + })); + tasks.insert(id, Some(jh)); + } + + fn get_next_task_id(hm: &mut HashMap>>) -> u64 { + loop { + let id = random::(); + match hm.entry(id) { + Entry::Occupied(_) => { + continue; + } + Entry::Vacant(v) => { + v.insert(None); + return id; + } + } + } + } + + /// Terminates all prevously spawned tasks + pub fn terminate_all(&self) { + let mut tasks_lock = self.running_task_id_to_handle.lock().unwrap(); + + let tasks: Vec<(u64, Option>)> = tasks_lock.drain().collect(); + for (_id, jh) in tasks { + jh.unwrap().abort(); + } + } +} diff --git a/io/zenoh-transport/Cargo.toml b/io/zenoh-transport/Cargo.toml index 4cb51fc504..e7a2e6e5f0 100644 --- a/io/zenoh-transport/Cargo.toml +++ b/io/zenoh-transport/Cargo.toml @@ -82,6 +82,7 @@ zenoh-shm = { workspace = true, optional = true } zenoh-sync = { workspace = true } zenoh-util = { workspace = true } zenoh-runtime = { workspace = true } +zenoh-task = { workspace = true } diff --git a/io/zenoh-transport/src/manager.rs b/io/zenoh-transport/src/manager.rs index 9bf40e5fd3..f28509fc27 100644 --- a/io/zenoh-transport/src/manager.rs +++ b/io/zenoh-transport/src/manager.rs @@ -24,7 +24,6 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use tokio::sync::Mutex as AsyncMutex; -use tokio_util::sync::CancellationToken; use zenoh_config::{Config, LinkRxConf, QueueConf, QueueSizeConf}; use zenoh_crypto::{BlockCipher, PseudoRng}; use zenoh_link::NewLinkChannelSender; @@ -34,6 +33,7 @@ use zenoh_protocol::{ VERSION, }; use zenoh_result::{bail, ZResult}; +use zenoh_task::TaskController; /// # Examples /// ``` @@ -321,7 +321,7 @@ pub struct TransportManager { pub(crate) new_unicast_link_sender: NewLinkChannelSender, #[cfg(feature = "stats")] pub(crate) stats: Arc, - pub(crate) token: CancellationToken, + task_controller: TaskController, } impl TransportManager { @@ -343,32 +343,21 @@ impl TransportManager { new_unicast_link_sender, #[cfg(feature = "stats")] stats: std::sync::Arc::new(crate::stats::TransportStats::default()), - token: CancellationToken::new(), + task_controller: TaskController::default(), }; // @TODO: this should be moved into the unicast module - zenoh_runtime::ZRuntime::Net.spawn({ - let this = this.clone(); - let token = this.token.clone(); - async move { - // while let Ok(link) = new_unicast_link_receiver.recv_async().await { - // this.handle_new_link_unicast(link).await; - // } - loop { - tokio::select! { - res = new_unicast_link_receiver.recv_async() => { - if let Ok(link) = res { - this.handle_new_link_unicast(link).await; - } - } - - _ = token.cancelled() => { - break; + this.task_controller + .spawn_with_rt(zenoh_runtime::ZRuntime::Net, { + let this = this.clone(); + async move { + loop { + if let Ok(link) = new_unicast_link_receiver.recv_async().await { + this.handle_new_link_unicast(link).await; } } } - } - }); + }); this } @@ -388,10 +377,7 @@ impl TransportManager { pub async fn close(&self) { self.close_unicast().await; - // TODO: Check this - self.token.cancel(); - // WARN: depends on the auto-close of tokio runtime after dropped - // self.tx_executor.runtime.shutdown_background(); + self.task_controller.terminate_all(); } /*************************************/ diff --git a/zenoh/Cargo.toml b/zenoh/Cargo.toml index 0e28905253..8cd4b9fa19 100644 --- a/zenoh/Cargo.toml +++ b/zenoh/Cargo.toml @@ -105,6 +105,7 @@ zenoh-sync = { workspace = true } zenoh-transport = { workspace = true } zenoh-util = { workspace = true } zenoh-runtime = { workspace = true } +zenoh-task = { workspace = true } [build-dependencies] rustc_version = { workspace = true } diff --git a/zenoh/src/net/primitives/mux.rs b/zenoh/src/net/primitives/mux.rs index 0aad0d8584..5c473e8ad8 100644 --- a/zenoh/src/net/primitives/mux.rs +++ b/zenoh/src/net/primitives/mux.rs @@ -48,7 +48,7 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() @@ -72,7 +72,7 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() @@ -96,7 +96,7 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() @@ -120,7 +120,7 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() @@ -144,7 +144,7 @@ impl Primitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() @@ -199,7 +199,7 @@ impl EPrimitives for Mux { }; if self.interceptor.interceptors.is_empty() { let _ = self.handler.schedule(msg); - } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { + } else if let Some(face) = self.face.get().and_then(|f| f.upgrade()) { let ctx = RoutingContext::new_out(msg, face.clone()); let prefix = ctx .wire_expr() diff --git a/zenoh/src/net/routing/dispatcher/face.rs b/zenoh/src/net/routing/dispatcher/face.rs index 9be11ad7d1..94f66ded8d 100644 --- a/zenoh/src/net/routing/dispatcher/face.rs +++ b/zenoh/src/net/routing/dispatcher/face.rs @@ -160,7 +160,7 @@ impl WeakFace { pub fn upgrade(&self) -> Option { Some(Face { tables: self.tables.upgrade()?, - state: self.state.upgrade()? + state: self.state.upgrade()?, }) } } @@ -175,7 +175,7 @@ impl Face { pub fn downgrade(&self) -> WeakFace { WeakFace { tables: Arc::downgrade(&self.tables), - state: Arc::downgrade(&self.state) + state: Arc::downgrade(&self.state), } } } diff --git a/zenoh/src/net/routing/dispatcher/resource.rs b/zenoh/src/net/routing/dispatcher/resource.rs index c1a2dc2e2a..1d9022bae1 100644 --- a/zenoh/src/net/routing/dispatcher/resource.rs +++ b/zenoh/src/net/routing/dispatcher/resource.rs @@ -292,7 +292,7 @@ impl Resource { let mut resclone = res.clone(); let mutres = get_mut_unchecked(&mut resclone); if let Some(ref mut parent) = mutres.parent { - if Arc::strong_count(res) <= 3 && res.childs.is_empty() { + if Arc::strong_count(res) <= 3 && res.childs.is_empty() { // consider only childless resource held by only one external object (+ 1 strong count for resclone, + 1 strong count for res.parent to a total of 3 ) log::debug!("Unregister resource {}", res.expr()); if let Some(context) = mutres.context.as_mut() { diff --git a/zenoh/src/net/routing/hat/linkstate_peer/network.rs b/zenoh/src/net/routing/hat/linkstate_peer/network.rs index 8e2d64c014..4d3497c861 100644 --- a/zenoh/src/net/routing/hat/linkstate_peer/network.rs +++ b/zenoh/src/net/routing/hat/linkstate_peer/network.rs @@ -489,7 +489,9 @@ impl Network { if !self.autoconnect.is_empty() { // Connect discovered peers if zenoh_runtime::ZRuntime::Net - .block_in_place(strong_runtime.manager().get_transport_unicast(&zid)) + .block_in_place( + strong_runtime.manager().get_transport_unicast(&zid), + ) .is_none() && self.autoconnect.matches(whatami) { diff --git a/zenoh/src/session.rs b/zenoh/src/session.rs index 4f31c32e8a..aef3be8737 100644 --- a/zenoh/src/session.rs +++ b/zenoh/src/session.rs @@ -81,6 +81,7 @@ use zenoh_protocol::{ }, }; use zenoh_result::ZResult; +use zenoh_task::TaskController; use zenoh_util::core::AsyncResolve; zconfigurable! { @@ -392,6 +393,7 @@ pub struct Session { pub(crate) state: Arc>, pub(crate) id: u16, pub(crate) alive: bool, + task_controller: TaskController, } static SESSION_ID_COUNTER: AtomicU16 = AtomicU16::new(0); @@ -412,6 +414,7 @@ impl Session { state: state.clone(), id: SESSION_ID_COUNTER.fetch_add(1, Ordering::SeqCst), alive: true, + task_controller: TaskController::default(), }; runtime.new_handler(Arc::new(admin::Handler::new(session.clone()))); @@ -518,6 +521,7 @@ impl Session { pub fn close(self) -> impl Resolve> { ResolveFuture::new(async move { trace!("close()"); + self.task_controller.terminate_all(); self.runtime.close().await?; let mut state = zwrite!(self.state); @@ -806,6 +810,7 @@ impl Session { state: self.state.clone(), id: self.id, alive: false, + task_controller: self.task_controller.clone(), } } @@ -1502,30 +1507,34 @@ impl Session { if key_expr.intersects(&msub.key_expr) { // Cannot hold session lock when calling tables (matching_status()) // TODO: check which ZRuntime should be used - zenoh_runtime::ZRuntime::RX.spawn({ - let session = self.clone(); - let msub = msub.clone(); - async move { - match msub.current.lock() { - Ok(mut current) => { - if !*current { - if let Ok(status) = - session.matching_status(&msub.key_expr, msub.destination) - { - if status.matching_subscribers() { - *current = true; - let callback = msub.callback.clone(); - (callback)(status) + self.task_controller + .spawn_with_rt(zenoh_runtime::ZRuntime::RX, { + let session = self.clone(); + let msub = msub.clone(); + async move { + match msub.current.lock() { + Ok(mut current) => { + if !*current { + if let Ok(status) = session + .matching_status(&msub.key_expr, msub.destination) + { + if status.matching_subscribers() { + *current = true; + let callback = msub.callback.clone(); + (callback)(status) + } } } } - } - Err(e) => { - log::error!("Error trying to acquire MathginListener lock: {}", e); + Err(e) => { + log::error!( + "Error trying to acquire MathginListener lock: {}", + e + ); + } } } - } - }); + }); } } } @@ -1536,30 +1545,34 @@ impl Session { if key_expr.intersects(&msub.key_expr) { // Cannot hold session lock when calling tables (matching_status()) // TODO: check which ZRuntime should be used - zenoh_runtime::ZRuntime::RX.spawn({ - let session = self.clone(); - let msub = msub.clone(); - async move { - match msub.current.lock() { - Ok(mut current) => { - if *current { - if let Ok(status) = - session.matching_status(&msub.key_expr, msub.destination) - { - if !status.matching_subscribers() { - *current = false; - let callback = msub.callback.clone(); - (callback)(status) + self.task_controller + .spawn_with_rt(zenoh_runtime::ZRuntime::RX, { + let session = self.clone(); + let msub = msub.clone(); + async move { + match msub.current.lock() { + Ok(mut current) => { + if *current { + if let Ok(status) = session + .matching_status(&msub.key_expr, msub.destination) + { + if !status.matching_subscribers() { + *current = false; + let callback = msub.callback.clone(); + (callback)(status) + } } } } - } - Err(e) => { - log::error!("Error trying to acquire MathginListener lock: {}", e); + Err(e) => { + log::error!( + "Error trying to acquire MathginListener lock: {}", + e + ); + } } } - } - }); + }); } } } @@ -1755,27 +1768,28 @@ impl Session { _ => 1, }; - zenoh_runtime::ZRuntime::Net.spawn({ - let state = self.state.clone(); - let zid = self.runtime.zid(); - async move { - tokio::time::sleep(timeout).await; - let mut state = zwrite!(state); - if let Some(query) = state.queries.remove(&qid) { - std::mem::drop(state); - log::debug!("Timeout on query {}! Send error and close.", qid); - if query.reception_mode == ConsolidationMode::Latest { - for (_, reply) in query.replies.unwrap().into_iter() { - (query.callback)(reply); + self.task_controller + .spawn_with_rt(zenoh_runtime::ZRuntime::Net, { + let state = self.state.clone(); + let zid = self.runtime.zid(); + async move { + tokio::time::sleep(timeout).await; + let mut state = zwrite!(state); + if let Some(query) = state.queries.remove(&qid) { + std::mem::drop(state); + log::debug!("Timeout on query {}! Send error and close.", qid); + if query.reception_mode == ConsolidationMode::Latest { + for (_, reply) in query.replies.unwrap().into_iter() { + (query.callback)(reply); + } } + (query.callback)(Reply { + sample: Err("Timeout".into()), + replier_id: zid, + }); } - (query.callback)(Reply { - sample: Err("Timeout".into()), - replier_id: zid, - }); } - } - }); + }); let selector = match scope { Some(scope) => Selector { From 8b8121032b51fbc5cf6312b384bcf3a614da5d67 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Fri, 15 Mar 2024 17:05:04 +0000 Subject: [PATCH 03/38] fix infinite recursion if session.close() is called explicitly --- zenoh/src/session.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zenoh/src/session.rs b/zenoh/src/session.rs index aef3be8737..0456a070cb 100644 --- a/zenoh/src/session.rs +++ b/zenoh/src/session.rs @@ -518,7 +518,7 @@ impl Session { /// session.close().res().await.unwrap(); /// # } /// ``` - pub fn close(self) -> impl Resolve> { + pub fn close(mut self) -> impl Resolve> { ResolveFuture::new(async move { trace!("close()"); self.task_controller.terminate_all(); @@ -529,7 +529,7 @@ impl Session { // clean up to break cyclic references from self.state to itself state.primitives.take(); state.queryables.clear(); - + self.alive = false; Ok(()) }) } From 2e7c9110722952d795b096195e0c4f1f361a507c Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Mon, 18 Mar 2024 14:59:26 +0000 Subject: [PATCH 04/38] add function to force-drop ZRUNTIME_POOL static variable --- commons/zenoh-runtime/src/lib.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index fb2186ecb4..114b3d2ac4 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -142,6 +142,25 @@ impl ZRuntimePool { } } +/// Force drops ZRUNTIME_POOL. +/// +/// Rust does not drop static variables. They are always reported by valgrind for exampler. +/// This function can be used force drop ZRUNTIME_POOL, to prevent valgrind reporting memory leaks related to it. +#[doc(hidden)] +pub unsafe fn zruntime_pool_free() { + let hm = &ZRUNTIME_POOL.0 as *const HashMap> + as *mut HashMap>; + for (_k, v) in hm.as_mut().unwrap().drain() { + if v.get().is_some() { + let r = v.get().unwrap(); + let r_mut = (r as *const Runtime) as *mut Runtime; + r_mut.read().shutdown_background(); + std::mem::forget(v); + } + } + std::mem::drop(hm.read()); +} + #[derive(Debug, Copy, Clone)] pub struct ZRuntimeConfig { pub application_threads: usize, From adb7da7e5113001c3afe629b49da06a4b8d7eb85 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Mon, 18 Mar 2024 17:59:25 +0000 Subject: [PATCH 05/38] make TaskController copyable --- commons/zenoh-task/src/lib.rs | 41 ++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/commons/zenoh-task/src/lib.rs b/commons/zenoh-task/src/lib.rs index 727b894ede..5432d3ea52 100644 --- a/commons/zenoh-task/src/lib.rs +++ b/commons/zenoh-task/src/lib.rs @@ -28,17 +28,37 @@ use std::sync::Mutex; use tokio::task::{self, JoinHandle}; use zenoh_runtime::ZRuntime; + + +struct TaskControllerInner (Mutex>>>); + +impl TaskControllerInner { + fn terminate_all(&self) { + let mut tasks_lock = self.0.lock().unwrap(); + + let tasks: Vec<(u64, Option>)> = tasks_lock.drain().collect(); + for (_id, jh) in tasks { + jh.unwrap().abort(); + } + } +} + +impl Drop for TaskControllerInner { + fn drop(&mut self) { + self.terminate_all() + } +} #[derive(Clone)] pub struct TaskController { - running_task_id_to_handle: Arc>>>>, + running_task_id_to_handle: Arc, } impl Default for TaskController { fn default() -> Self { TaskController { - running_task_id_to_handle: Arc::new(Mutex::new( + running_task_id_to_handle: Arc::new(TaskControllerInner(Mutex::new( HashMap::>>::new(), - )), + ))), } } } @@ -51,11 +71,11 @@ impl TaskController { F: Future + Send + 'static, T: Send + 'static, { - let mut tasks = self.running_task_id_to_handle.lock().unwrap(); + let mut tasks = self.running_task_id_to_handle.0.lock().unwrap(); let id = TaskController::get_next_task_id(tasks.deref_mut()); let tasks_mutex = self.running_task_id_to_handle.clone(); let jh = task::spawn(futures::FutureExt::map(future, move |_| { - tasks_mutex.lock().unwrap().remove(&id); + tasks_mutex.0.lock().unwrap().remove(&id); })); tasks.insert(id, Some(jh)); } @@ -67,11 +87,11 @@ impl TaskController { F: Future + Send + 'static, T: Send + 'static, { - let mut tasks = self.running_task_id_to_handle.lock().unwrap(); + let mut tasks = self.running_task_id_to_handle.0.lock().unwrap(); let id = TaskController::get_next_task_id(tasks.deref_mut()); let tasks_mutex = self.running_task_id_to_handle.clone(); let jh = rt.spawn(futures::FutureExt::map(future, move |_| { - tasks_mutex.lock().unwrap().remove(&id); + tasks_mutex.0.lock().unwrap().remove(&id); })); tasks.insert(id, Some(jh)); } @@ -93,11 +113,6 @@ impl TaskController { /// Terminates all prevously spawned tasks pub fn terminate_all(&self) { - let mut tasks_lock = self.running_task_id_to_handle.lock().unwrap(); - - let tasks: Vec<(u64, Option>)> = tasks_lock.drain().collect(); - for (_id, jh) in tasks { - jh.unwrap().abort(); - } + self.running_task_id_to_handle.terminate_all(); } } From 44dfef40f28f6cfcc63e57905e1702f1d32a2177 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Tue, 19 Mar 2024 10:19:00 +0000 Subject: [PATCH 06/38] make TaskController use tokio_util TaskTracker and CancellationTokens --- commons/zenoh-task/Cargo.toml | 2 +- commons/zenoh-task/src/lib.rs | 112 ++++++++++++++++------------------ 2 files changed, 53 insertions(+), 61 deletions(-) diff --git a/commons/zenoh-task/Cargo.toml b/commons/zenoh-task/Cargo.toml index c0f7ec0403..a4b7ad21f8 100644 --- a/commons/zenoh-task/Cargo.toml +++ b/commons/zenoh-task/Cargo.toml @@ -28,4 +28,4 @@ description = "Internal crate for zenoh." tokio = { workspace = true, features = ["default", "sync"] } futures = { workspace = true } zenoh-runtime = { workspace = true } -rand = { workspace = true } \ No newline at end of file +tokio-util = { workspace = true, features = ["rt"] } \ No newline at end of file diff --git a/commons/zenoh-task/src/lib.rs b/commons/zenoh-task/src/lib.rs index 5432d3ea52..fcb1e1452a 100644 --- a/commons/zenoh-task/src/lib.rs +++ b/commons/zenoh-task/src/lib.rs @@ -18,47 +18,23 @@ //! //! [Click here for Zenoh's documentation](../zenoh/index.html) -use rand::random; -use std::collections::hash_map::Entry; -use std::collections::HashMap; use std::future::Future; -use std::ops::DerefMut; -use std::sync::Arc; -use std::sync::Mutex; -use tokio::task::{self, JoinHandle}; use zenoh_runtime::ZRuntime; +use tokio_util::sync::CancellationToken; +use tokio_util::task::TaskTracker; - -struct TaskControllerInner (Mutex>>>); - -impl TaskControllerInner { - fn terminate_all(&self) { - let mut tasks_lock = self.0.lock().unwrap(); - - let tasks: Vec<(u64, Option>)> = tasks_lock.drain().collect(); - for (_id, jh) in tasks { - jh.unwrap().abort(); - } - } -} - -impl Drop for TaskControllerInner { - fn drop(&mut self) { - self.terminate_all() - } -} #[derive(Clone)] pub struct TaskController { - running_task_id_to_handle: Arc, + tracker: TaskTracker, + token: CancellationToken, } impl Default for TaskController { fn default() -> Self { TaskController { - running_task_id_to_handle: Arc::new(TaskControllerInner(Mutex::new( - HashMap::>>::new(), - ))), + tracker: TaskTracker::new(), + token: CancellationToken::new(), } } } @@ -71,48 +47,64 @@ impl TaskController { F: Future + Send + 'static, T: Send + 'static, { - let mut tasks = self.running_task_id_to_handle.0.lock().unwrap(); - let id = TaskController::get_next_task_id(tasks.deref_mut()); - let tasks_mutex = self.running_task_id_to_handle.clone(); - let jh = task::spawn(futures::FutureExt::map(future, move |_| { - tasks_mutex.0.lock().unwrap().remove(&id); - })); - tasks.insert(id, Some(jh)); + let token = self.token.child_token(); + let task = async move { + tokio::select! { + _ = token.cancelled() => {}, + _ = future => {} + } + }; + self.tracker.spawn(task); } - /// Spawns a task using a specified runtime (similarly to Runtime::spawn) that can be later terminated by call to terminate_all() - /// Task output is ignored + /// Spawns a task using a specified runtime (similarly to Runtime::spawn) that can be later terminated by call to terminate_all(). + /// Task output is ignored. pub fn spawn_with_rt(&self, rt: ZRuntime, future: F) where F: Future + Send + 'static, T: Send + 'static, { - let mut tasks = self.running_task_id_to_handle.0.lock().unwrap(); - let id = TaskController::get_next_task_id(tasks.deref_mut()); - let tasks_mutex = self.running_task_id_to_handle.clone(); - let jh = rt.spawn(futures::FutureExt::map(future, move |_| { - tasks_mutex.0.lock().unwrap().remove(&id); - })); - tasks.insert(id, Some(jh)); + let token = self.token.child_token(); + let task = async move { + tokio::select! { + _ = token.cancelled() => {}, + _ = future => {} + } + }; + self.tracker.spawn_on(task, &rt); } - fn get_next_task_id(hm: &mut HashMap>>) -> u64 { - loop { - let id = random::(); - match hm.entry(id) { - Entry::Occupied(_) => { - continue; - } - Entry::Vacant(v) => { - v.insert(None); - return id; - } - } - } + pub fn get_cancellation_token(&self) -> CancellationToken { + self.token.child_token() + } + + /// Spawns a task that can be cancelled via cancelling a token obtained by get_cancellation_token(). + /// It can be later terminated by call to terminate_all(). + /// Task output is ignored. + pub fn spawn_cancellable(&self, future: F) + where + F: Future + Send + 'static, + T: Send + 'static, + { + self.tracker.spawn(future); + } + + /// Spawns a task that can be cancelled via cancelling a token obtained by get_cancellation_token() using a specified runtime. + /// It can be later terminated by call to terminate_all(). + /// Task output is ignored. + pub fn spawn_cancellable_with_rt(&self, rt: ZRuntime, future: F) + where + F: Future + Send + 'static, + T: Send + 'static, + { + self.tracker.spawn_on(future, &rt); } /// Terminates all prevously spawned tasks pub fn terminate_all(&self) { - self.running_task_id_to_handle.terminate_all(); + self.tracker.close(); + self.token.cancel(); + let tracker = self.tracker.clone(); + futures::executor::block_on( async move { tracker.wait().await } ); } } From e12eedfd362da535151ae49c3a62887afa0980cc Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Tue, 19 Mar 2024 14:50:01 +0000 Subject: [PATCH 07/38] terminate more tasks --- commons/zenoh-runtime/src/lib.rs | 13 +++++++- commons/zenoh-task/src/lib.rs | 31 +++++++++++-------- io/zenoh-transport/src/manager.rs | 2 +- io/zenoh-transport/src/multicast/transport.rs | 15 +++++---- io/zenoh-transport/src/unicast/manager.rs | 23 +++++++------- zenoh/src/net/runtime/mod.rs | 18 +++++------ 6 files changed, 57 insertions(+), 45 deletions(-) diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index 114b3d2ac4..8f11935bbe 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -143,7 +143,7 @@ impl ZRuntimePool { } /// Force drops ZRUNTIME_POOL. -/// +/// /// Rust does not drop static variables. They are always reported by valgrind for exampler. /// This function can be used force drop ZRUNTIME_POOL, to prevent valgrind reporting memory leaks related to it. #[doc(hidden)] @@ -161,6 +161,17 @@ pub unsafe fn zruntime_pool_free() { std::mem::drop(hm.read()); } +#[doc(hidden)] +pub struct ZRuntimeGuard; + +impl Drop for ZRuntimeGuard { + fn drop(&mut self) { + unsafe { + zruntime_pool_free(); + } + } +} + #[derive(Debug, Copy, Clone)] pub struct ZRuntimeConfig { pub application_threads: usize, diff --git a/commons/zenoh-task/src/lib.rs b/commons/zenoh-task/src/lib.rs index fcb1e1452a..3d665f3035 100644 --- a/commons/zenoh-task/src/lib.rs +++ b/commons/zenoh-task/src/lib.rs @@ -19,10 +19,10 @@ //! [Click here for Zenoh's documentation](../zenoh/index.html) use std::future::Future; -use zenoh_runtime::ZRuntime; +use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tokio_util::task::TaskTracker; - +use zenoh_runtime::ZRuntime; #[derive(Clone)] pub struct TaskController { @@ -42,7 +42,7 @@ impl Default for TaskController { impl TaskController { /// Spawns a task (similarly to task::spawn) that can be later terminated by call to terminate_all() /// Task output is ignored - pub fn spawn(&self, future: F) + pub fn spawn(&self, future: F) -> JoinHandle<()> where F: Future + Send + 'static, T: Send + 'static, @@ -54,12 +54,12 @@ impl TaskController { _ = future => {} } }; - self.tracker.spawn(task); + self.tracker.spawn(task) } /// Spawns a task using a specified runtime (similarly to Runtime::spawn) that can be later terminated by call to terminate_all(). /// Task output is ignored. - pub fn spawn_with_rt(&self, rt: ZRuntime, future: F) + pub fn spawn_with_rt(&self, rt: ZRuntime, future: F) -> JoinHandle<()> where F: Future + Send + 'static, T: Send + 'static, @@ -71,7 +71,7 @@ impl TaskController { _ = future => {} } }; - self.tracker.spawn_on(task, &rt); + self.tracker.spawn_on(task, &rt) } pub fn get_cancellation_token(&self) -> CancellationToken { @@ -80,24 +80,22 @@ impl TaskController { /// Spawns a task that can be cancelled via cancelling a token obtained by get_cancellation_token(). /// It can be later terminated by call to terminate_all(). - /// Task output is ignored. - pub fn spawn_cancellable(&self, future: F) + pub fn spawn_cancellable(&self, future: F) -> JoinHandle where F: Future + Send + 'static, T: Send + 'static, { - self.tracker.spawn(future); + self.tracker.spawn(future) } /// Spawns a task that can be cancelled via cancelling a token obtained by get_cancellation_token() using a specified runtime. /// It can be later terminated by call to terminate_all(). - /// Task output is ignored. - pub fn spawn_cancellable_with_rt(&self, rt: ZRuntime, future: F) + pub fn spawn_cancellable_with_rt(&self, rt: ZRuntime, future: F) -> JoinHandle where F: Future + Send + 'static, T: Send + 'static, { - self.tracker.spawn_on(future, &rt); + self.tracker.spawn_on(future, &rt) } /// Terminates all prevously spawned tasks @@ -105,6 +103,13 @@ impl TaskController { self.tracker.close(); self.token.cancel(); let tracker = self.tracker.clone(); - futures::executor::block_on( async move { tracker.wait().await } ); + futures::executor::block_on(async move { tracker.wait().await }); + } + + /// Terminates all prevously spawned tasks + pub async fn terminate_all_async(&self) { + self.tracker.close(); + self.token.cancel(); + self.tracker.wait().await; } } diff --git a/io/zenoh-transport/src/manager.rs b/io/zenoh-transport/src/manager.rs index f28509fc27..b66eeac501 100644 --- a/io/zenoh-transport/src/manager.rs +++ b/io/zenoh-transport/src/manager.rs @@ -321,7 +321,7 @@ pub struct TransportManager { pub(crate) new_unicast_link_sender: NewLinkChannelSender, #[cfg(feature = "stats")] pub(crate) stats: Arc, - task_controller: TaskController, + pub(crate) task_controller: TaskController, } impl TransportManager { diff --git a/io/zenoh-transport/src/multicast/transport.rs b/io/zenoh-transport/src/multicast/transport.rs index c647730390..0cc7efc1d6 100644 --- a/io/zenoh-transport/src/multicast/transport.rs +++ b/io/zenoh-transport/src/multicast/transport.rs @@ -39,6 +39,7 @@ use zenoh_protocol::{ transport::{close, Join}, }; use zenoh_result::{bail, ZResult}; +use zenoh_task::TaskController; // use zenoh_util::{Timed, TimedEvent, TimedHandle, Timer}; /*************************************/ @@ -82,8 +83,8 @@ pub(crate) struct TransportMulticastInner { pub(super) link: Arc>>, // The callback pub(super) callback: Arc>>>, - // token for safe cancellation - token: CancellationToken, + // Task controller for safe task cancellation + task_controller: TaskController, // Transport statistics #[cfg(feature = "stats")] pub(super) stats: Arc, @@ -115,7 +116,7 @@ impl TransportMulticastInner { locator: config.link.link.get_dst().to_owned(), link: Arc::new(RwLock::new(None)), callback: Arc::new(RwLock::new(None)), - token: CancellationToken::new(), + task_controller: TaskController::default(), #[cfg(feature = "stats")] stats, }; @@ -183,8 +184,7 @@ impl TransportMulticastInner { cb.closed(); } - // TODO(yuyuan): use CancellationToken to unify the termination with the above - self.token.cancel(); + self.task_controller.terminate_all_async().await; Ok(()) } @@ -369,7 +369,7 @@ impl TransportMulticastInner { // TODO(yuyuan): refine the clone behaviors let is_active = Arc::new(AtomicBool::new(false)); let c_is_active = is_active.clone(); - let token = self.token.child_token(); + let token = self.task_controller.get_cancellation_token(); let c_token = token.clone(); let c_self = self.clone(); let c_locator = locator.clone(); @@ -389,8 +389,7 @@ impl TransportMulticastInner { let _ = c_self.del_peer(&c_locator, close::reason::EXPIRED); }; - // TODO(yuyuan): Put it into TaskTracker or store as JoinHandle - zenoh_runtime::ZRuntime::Acceptor.spawn(task); + self.task_controller.spawn_cancellable_with_rt(zenoh_runtime::ZRuntime::Acceptor, task); // TODO(yuyuan): Integrate the above async task into TransportMulticastPeer // Store the new peer diff --git a/io/zenoh-transport/src/unicast/manager.rs b/io/zenoh-transport/src/unicast/manager.rs index eaf25cd2a3..8a63f4f630 100644 --- a/io/zenoh-transport/src/unicast/manager.rs +++ b/io/zenoh-transport/src/unicast/manager.rs @@ -744,17 +744,18 @@ impl TransportManager { // Spawn a task to accept the link let c_manager = self.clone(); - zenoh_runtime::ZRuntime::Acceptor.spawn(async move { - if let Err(e) = tokio::time::timeout( - c_manager.config.unicast.accept_timeout, - super::establishment::accept::accept_link(link, &c_manager), - ) - .await - { - log::debug!("{}", e); - } - incoming_counter.fetch_sub(1, SeqCst); - }); + self.task_controller + .spawn_with_rt(zenoh_runtime::ZRuntime::Acceptor, async move { + if let Err(e) = tokio::time::timeout( + c_manager.config.unicast.accept_timeout, + super::establishment::accept::accept_link(link, &c_manager), + ) + .await + { + log::debug!("{}", e); + } + incoming_counter.fetch_sub(1, SeqCst); + }); } } diff --git a/zenoh/src/net/runtime/mod.rs b/zenoh/src/net/runtime/mod.rs index 8351c8f352..78b30c5b12 100644 --- a/zenoh/src/net/runtime/mod.rs +++ b/zenoh/src/net/runtime/mod.rs @@ -31,7 +31,6 @@ use futures::Future; use std::any::Any; use std::sync::{Arc, Weak}; use tokio::task::JoinHandle; -use tokio_util::sync::CancellationToken; use uhlc::{HLCBuilder, HLC}; use zenoh_link::{EndPoint, Link}; use zenoh_plugin_trait::{PluginStartArgs, StructVersion}; @@ -39,6 +38,7 @@ use zenoh_protocol::core::{Locator, WhatAmI, ZenohId}; use zenoh_protocol::network::NetworkMessage; use zenoh_result::{bail, ZResult}; use zenoh_sync::get_mut_unchecked; +use zenoh_task::TaskController; use zenoh_transport::{ multicast::TransportMulticast, unicast::TransportUnicast, TransportEventHandler, TransportManager, TransportMulticastEventHandler, TransportPeer, TransportPeerEventHandler, @@ -54,7 +54,7 @@ struct RuntimeState { transport_handlers: std::sync::RwLock>>, locators: std::sync::RwLock>, hlc: Option>, - token: CancellationToken, + task_controller: TaskController, } pub struct WeakRuntime { @@ -130,7 +130,7 @@ impl Runtime { transport_handlers: std::sync::RwLock::new(vec![]), locators: std::sync::RwLock::new(vec![]), hlc, - token: CancellationToken::new(), + task_controller: TaskController::default(), }), }; *handler.runtime.write().unwrap() = Runtime::downgrade(&runtime); @@ -166,7 +166,7 @@ impl Runtime { pub async fn close(&self) -> ZResult<()> { log::trace!("Runtime::close())"); // TODO: Check this whether is able to terminate all spawned task by Runtime::spawn - self.state.token.cancel(); + self.state.task_controller.terminate_all(); self.manager().close().await; // clean up to break cyclic reference of self.state to itself self.state.transport_handlers.write().unwrap().clear(); @@ -186,13 +186,9 @@ impl Runtime { F: Future + Send + 'static, T: Send + 'static, { - let token = self.state.token.clone(); - zenoh_runtime::ZRuntime::Net.spawn(async move { - tokio::select! { - _ = token.cancelled() => {} - _ = future => {} - } - }) + self.state + .task_controller + .spawn_with_rt(zenoh_runtime::ZRuntime::Net, future) } pub(crate) fn router(&self) -> Arc { From 15b1c92e7d30ec0ce6286314d4901a6a862660b7 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Tue, 19 Mar 2024 15:13:37 +0000 Subject: [PATCH 08/38] clippy, fmt --- commons/zenoh-runtime/src/lib.rs | 4 ++-- io/zenoh-transport/src/multicast/transport.rs | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index 8f11935bbe..41cfc6655c 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -162,9 +162,9 @@ pub unsafe fn zruntime_pool_free() { } #[doc(hidden)] -pub struct ZRuntimeGuard; +pub struct ZRuntimePoolGuard; -impl Drop for ZRuntimeGuard { +impl Drop for ZRuntimePoolGuard { fn drop(&mut self) { unsafe { zruntime_pool_free(); diff --git a/io/zenoh-transport/src/multicast/transport.rs b/io/zenoh-transport/src/multicast/transport.rs index 0cc7efc1d6..94564b74fd 100644 --- a/io/zenoh-transport/src/multicast/transport.rs +++ b/io/zenoh-transport/src/multicast/transport.rs @@ -389,7 +389,8 @@ impl TransportMulticastInner { let _ = c_self.del_peer(&c_locator, close::reason::EXPIRED); }; - self.task_controller.spawn_cancellable_with_rt(zenoh_runtime::ZRuntime::Acceptor, task); + self.task_controller + .spawn_cancellable_with_rt(zenoh_runtime::ZRuntime::Acceptor, task); // TODO(yuyuan): Integrate the above async task into TransportMulticastPeer // Store the new peer From 757cd487bc6a1ba034ba2a17c32a8eedab2a890c Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 21 Mar 2024 11:57:43 +0000 Subject: [PATCH 09/38] - add ci bash script to run valgrind test; - add manual resource clean up to Runtime::close(); - make certain task termination a bit cleaner; --- .gitignore | 2 + Cargo.toml | 4 +- ci/valgrind-check/Cargo.toml | 37 ++++++++++ ci/valgrind-check/run.sh | 18 +++++ .../src/pub_sub/bin/z_pub_sub.rs | 53 ++++++++++++++ .../src/queryable_get/bin/z_queryable_get.rs | 71 +++++++++++++++++++ .../zenoh-link-unixpipe/src/unix/unicast.rs | 8 ++- zenoh/src/net/routing/dispatcher/resource.rs | 11 +++ .../src/net/routing/hat/linkstate_peer/mod.rs | 10 +++ zenoh/src/net/routing/hat/router/mod.rs | 14 ++++ zenoh/src/net/runtime/mod.rs | 6 ++ 11 files changed, 230 insertions(+), 4 deletions(-) create mode 100644 ci/valgrind-check/Cargo.toml create mode 100755 ci/valgrind-check/run.sh create mode 100644 ci/valgrind-check/src/pub_sub/bin/z_pub_sub.rs create mode 100644 ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs diff --git a/.gitignore b/.gitignore index 695d0464b1..105dae1aa7 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ .vscode cargo-timing*.html + +ci/valgrind-check/*.log diff --git a/Cargo.toml b/Cargo.toml index b069989d68..95e6981415 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,7 +51,7 @@ members = [ "zenoh-ext", "zenohd", ] -exclude = ["ci/nostd-check"] +exclude = ["ci/nostd-check", "ci/valgrind-check"] [workspace.package] rust-version = "1.66.1" @@ -214,4 +214,4 @@ debug = false # If you want debug symbol in release mode, set the env variab lto = "fat" codegen-units = 1 opt-level = 3 -panic = "abort" +panic = "abort" \ No newline at end of file diff --git a/ci/valgrind-check/Cargo.toml b/ci/valgrind-check/Cargo.toml new file mode 100644 index 0000000000..cf6f6a844b --- /dev/null +++ b/ci/valgrind-check/Cargo.toml @@ -0,0 +1,37 @@ +# +# Copyright (c) 2024 ZettaScale Technology +# +# This program and the accompanying materials are made available under the +# terms of the Eclipse Public License 2.0 which is available at +# http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +# which is available at https://www.apache.org/licenses/LICENSE-2.0. +# +# SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +# +# Contributors: +# ZettaScale Zenoh Team, +# +[package] +name = "valgrind-check" +version = "0.1.0" +repository = "https://github.com/eclipse-zenoh/zenoh" +homepage = "http://zenoh.io" +license = "EPL-2.0 OR Apache-2.0" +edition = "2021" +categories = ["network-programming"] +description = "Internal crate for zenoh." + +[dependencies] +tokio = { version = "1.35.1", features = ["rt-multi-thread", "time", "io-std"] } +env_logger = "0.11.0" +futures = "0.3.25" +zenoh = { path = "../../zenoh/" } +zenoh-runtime = { path = "../../commons/zenoh-runtime/" } + +[[bin]] +name = "pub_sub" +path = "src/pub_sub/bin/z_pub_sub.rs" + +[[bin]] +name = "queryable_get" +path = "src/queryable_get/bin/z_queryable_get.rs" diff --git a/ci/valgrind-check/run.sh b/ci/valgrind-check/run.sh new file mode 100755 index 0000000000..d603233fd1 --- /dev/null +++ b/ci/valgrind-check/run.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -e +function check_leaks { + echo "Checking $1 for memory leaks" + valgrind --leak-check=full --num-callers=50 --log-file="$1_leaks.log" ./target/debug/$1 + num_leaks=$(grep 'ERROR SUMMARY: [0-9]+' -Eo "$1_leaks.log" | grep '[0-9]+' -Eo) + echo "Detected $num_leaks memory leaks" + if (( num_leaks == 0 )) + then + return 0 + else + return -1 + fi +} + +cargo build +check_leaks "queryable_get" +check_leaks "pub_sub" \ No newline at end of file diff --git a/ci/valgrind-check/src/pub_sub/bin/z_pub_sub.rs b/ci/valgrind-check/src/pub_sub/bin/z_pub_sub.rs new file mode 100644 index 0000000000..6cada97301 --- /dev/null +++ b/ci/valgrind-check/src/pub_sub/bin/z_pub_sub.rs @@ -0,0 +1,53 @@ +// +// Copyright (c) 2023 ZettaScale Technology +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +// which is available at https://www.apache.org/licenses/LICENSE-2.0. +// +// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +// +// Contributors: +// ZettaScale Zenoh Team, +// +use std::time::Duration; +use zenoh::config::Config; +use zenoh::prelude::r#async::*; + +#[tokio::main] +async fn main() { + let _z = zenoh_runtime::ZRuntimePoolGuard; + env_logger::init(); + + let pub_key_expr = KeyExpr::try_from("test/valgrind/data").unwrap(); + let sub_key_expr = KeyExpr::try_from("test/valgrind/**").unwrap(); + + println!("Declaring Publisher on '{pub_key_expr}'..."); + let pub_session = zenoh::open(Config::default()).res().await.unwrap(); + let publisher = pub_session.declare_publisher(&pub_key_expr).res().await.unwrap(); + + println!("Declaring Subscriber on '{sub_key_expr}'..."); + let sub_session = zenoh::open(Config::default()).res().await.unwrap(); + let _subscriber = sub_session + .declare_subscriber(&sub_key_expr) + .callback(|sample| { + println!( + ">> [Subscriber] Received {} ('{}': '{}')", + sample.kind, + sample.key_expr.as_str(), + sample.value + ); + }) + .res().await.unwrap(); + + + for idx in 0..5 { + tokio::time::sleep(Duration::from_secs(1)).await; + let buf = format!("[{idx:4}] data"); + println!("Putting Data ('{}': '{}')...", &pub_key_expr, buf); + publisher.put(buf).res().await.unwrap(); + } + + tokio::time::sleep(Duration::from_secs(1)).await; +} diff --git a/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs b/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs new file mode 100644 index 0000000000..dc1fb6450e --- /dev/null +++ b/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs @@ -0,0 +1,71 @@ +// +// Copyright (c) 2023 ZettaScale Technology +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +// which is available at https://www.apache.org/licenses/LICENSE-2.0. +// +// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +// +// Contributors: +// ZettaScale Zenoh Team, +// +use std::convert::TryFrom; +use std::time::Duration; +use zenoh::config::Config; +use zenoh::prelude::r#async::*; + +#[tokio::main] +async fn main() { + let _z = zenoh_runtime::ZRuntimePoolGuard; + env_logger::init(); + + let queryable_key_expr = KeyExpr::try_from("test/valgrind/data").unwrap(); + let get_selector = Selector::try_from("test/valgrind/**").unwrap(); + + println!("Declaring Queryable on '{queryable_key_expr}'..."); + let queryable_session = zenoh::open(Config::default()).res().await.unwrap(); + let _queryable = queryable_session + .declare_queryable(&queryable_key_expr.clone()) + .callback(move |query| { + println!(">> Handling query '{}'", query.selector()); + let reply = Ok(Sample::new(queryable_key_expr.clone(), query.value().unwrap().clone())); + futures::executor::block_on(async move { + query + .reply(reply) + .res() + .await + .unwrap(); + } + ) + }) + .complete(true) + .res() + .await + .unwrap(); + + println!("Declaring Get session for '{get_selector}'..."); + let get_session = zenoh::open(Config::default()).res().await.unwrap(); + + for idx in 0..5 { + tokio::time::sleep(Duration::from_secs(1)).await; + println!("Sending Query '{get_selector}'..."); + let replies = get_session.get(&get_selector).with_value(idx) + .target(QueryTarget::All) + .res() + .await + .unwrap(); + while let Ok(reply) = replies.recv_async().await { + match reply.sample { + Ok(sample) => println!( + ">> Received ('{}': '{}')", + sample.key_expr.as_str(), + sample.value, + ), + Err(err) => println!(">> Received (ERROR: '{}')", String::try_from(&err).unwrap()), + } + } + } + tokio::time::sleep(Duration::from_secs(1)).await; +} \ No newline at end of file diff --git a/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs b/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs index 8f3577a8e9..d00d50d5ed 100644 --- a/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs +++ b/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs @@ -27,11 +27,12 @@ use std::io::ErrorKind; use std::io::{Read, Write}; use std::os::unix::fs::OpenOptionsExt; use std::sync::Arc; +use tokio::task::JoinHandle; use tokio::fs::remove_file; use tokio::io::unix::AsyncFd; use tokio::io::Interest; use tokio_util::sync::CancellationToken; -use zenoh_core::{zasyncread, zasyncwrite}; +use zenoh_core::{zasyncread, zasyncwrite, ResolveFuture, SyncResolve}; use zenoh_protocol::core::{EndPoint, Locator}; use zenoh_runtime::ZRuntime; @@ -285,6 +286,7 @@ async fn handle_incoming_connections( struct UnicastPipeListener { uplink_locator: Locator, token: CancellationToken, + handle: JoinHandle<()>, } impl UnicastPipeListener { async fn listen(endpoint: EndPoint, manager: Arc) -> ZResult { @@ -303,7 +305,7 @@ impl UnicastPipeListener { // WARN: The spawn_blocking is mandatory verified by the ping/pong test // create listening task - tokio::task::spawn_blocking(move || { + let handle = tokio::task::spawn_blocking(move || { ZRuntime::Acceptor.block_on(async move { loop { tokio::select! { @@ -325,11 +327,13 @@ impl UnicastPipeListener { Ok(Self { uplink_locator: local, token, + handle, }) } fn stop_listening(self) { self.token.cancel(); + let _ = ResolveFuture::new(async move { self.handle.await} ).res_sync(); } } diff --git a/zenoh/src/net/routing/dispatcher/resource.rs b/zenoh/src/net/routing/dispatcher/resource.rs index 1d9022bae1..2158387499 100644 --- a/zenoh/src/net/routing/dispatcher/resource.rs +++ b/zenoh/src/net/routing/dispatcher/resource.rs @@ -316,6 +316,17 @@ impl Resource { } } + pub fn close(self: &mut Arc) { + let r = get_mut_unchecked(self); + for (_s, c) in &mut r.childs { + Self::close(c); + } + r.parent.take(); + r.childs.clear(); + r.nonwild_prefix.take(); + r.session_ctxs.clear(); + } + #[cfg(test)] pub fn print_tree(from: &Arc) -> String { let mut result = from.expr(); diff --git a/zenoh/src/net/routing/hat/linkstate_peer/mod.rs b/zenoh/src/net/routing/hat/linkstate_peer/mod.rs index 020d796a1a..945c8cae87 100644 --- a/zenoh/src/net/routing/hat/linkstate_peer/mod.rs +++ b/zenoh/src/net/routing/hat/linkstate_peer/mod.rs @@ -50,6 +50,7 @@ use std::{ }; use tokio::task::JoinHandle; use zenoh_config::{unwrap_or_default, ModeDependent, WhatAmI, WhatAmIMatcher, ZenohId}; +use zenoh_core::{ResolveFuture, SyncResolve}; use zenoh_protocol::{ common::ZExtBody, network::{declare::queryable::ext::QueryableInfo, oam::id::OAM_LINKSTATE, Oam}, @@ -115,6 +116,15 @@ struct HatTables { peers_trees_task: Option>, } +impl Drop for HatTables { + fn drop(&mut self) { + if self.peers_trees_task.is_some() { + let task = self.peers_trees_task.take().unwrap(); + ResolveFuture::new(async move { let _ = task.await; }).res_sync(); + } + } +} + impl HatTables { fn new() -> Self { Self { diff --git a/zenoh/src/net/routing/hat/router/mod.rs b/zenoh/src/net/routing/hat/router/mod.rs index 5497afc9b8..7a9ac244ea 100644 --- a/zenoh/src/net/routing/hat/router/mod.rs +++ b/zenoh/src/net/routing/hat/router/mod.rs @@ -55,6 +55,7 @@ use std::{ }; use tokio::task::JoinHandle; use zenoh_config::{unwrap_or_default, ModeDependent, WhatAmI, WhatAmIMatcher, ZenohId}; +use zenoh_core::{ResolveFuture, SyncResolve}; use zenoh_protocol::{ common::ZExtBody, network::{declare::queryable::ext::QueryableInfo, oam::id::OAM_LINKSTATE, Oam}, @@ -126,6 +127,19 @@ struct HatTables { router_peers_failover_brokering: bool, } +impl Drop for HatTables { + fn drop(&mut self) { + if self.peers_trees_task.is_some() { + let task = self.peers_trees_task.take().unwrap(); + ResolveFuture::new(async move { let _ = task.await; }).res_sync(); + } + if self.routers_trees_task.is_some() { + let task = self.routers_trees_task.take().unwrap(); + ResolveFuture::new(async move { let _ = task.await; }).res_sync(); + } + } +} + impl HatTables { fn new(router_peers_failover_brokering: bool) -> Self { Self { diff --git a/zenoh/src/net/runtime/mod.rs b/zenoh/src/net/runtime/mod.rs index 78b30c5b12..f78b3ada12 100644 --- a/zenoh/src/net/runtime/mod.rs +++ b/zenoh/src/net/runtime/mod.rs @@ -170,6 +170,12 @@ impl Runtime { self.manager().close().await; // clean up to break cyclic reference of self.state to itself self.state.transport_handlers.write().unwrap().clear(); + // TODO: the call below is needed to prevent intermittent leak + // due to not freed resource Arc, that apparently happens because + // the task responsible for resource clean up was aborted earlier than expected. + // This should be resolved by identfying correspodning task, and placing + // cancelaltion token manually inside it. + self.router().tables.tables.write().unwrap().root_res.close(); Ok(()) } From a1beb288d0f6833af26571550d40f5b3d8f24174 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 21 Mar 2024 11:59:52 +0000 Subject: [PATCH 10/38] -fix fmt --- .../src/pub_sub/bin/z_pub_sub.rs | 13 +++++--- .../src/queryable_get/bin/z_queryable_get.rs | 32 +++++++++---------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/ci/valgrind-check/src/pub_sub/bin/z_pub_sub.rs b/ci/valgrind-check/src/pub_sub/bin/z_pub_sub.rs index 6cada97301..fac3437f39 100644 --- a/ci/valgrind-check/src/pub_sub/bin/z_pub_sub.rs +++ b/ci/valgrind-check/src/pub_sub/bin/z_pub_sub.rs @@ -22,10 +22,14 @@ async fn main() { let pub_key_expr = KeyExpr::try_from("test/valgrind/data").unwrap(); let sub_key_expr = KeyExpr::try_from("test/valgrind/**").unwrap(); - + println!("Declaring Publisher on '{pub_key_expr}'..."); let pub_session = zenoh::open(Config::default()).res().await.unwrap(); - let publisher = pub_session.declare_publisher(&pub_key_expr).res().await.unwrap(); + let publisher = pub_session + .declare_publisher(&pub_key_expr) + .res() + .await + .unwrap(); println!("Declaring Subscriber on '{sub_key_expr}'..."); let sub_session = zenoh::open(Config::default()).res().await.unwrap(); @@ -39,8 +43,9 @@ async fn main() { sample.value ); }) - .res().await.unwrap(); - + .res() + .await + .unwrap(); for idx in 0..5 { tokio::time::sleep(Duration::from_secs(1)).await; diff --git a/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs b/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs index dc1fb6450e..1307bb1819 100644 --- a/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs +++ b/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs @@ -23,22 +23,20 @@ async fn main() { let queryable_key_expr = KeyExpr::try_from("test/valgrind/data").unwrap(); let get_selector = Selector::try_from("test/valgrind/**").unwrap(); - + println!("Declaring Queryable on '{queryable_key_expr}'..."); let queryable_session = zenoh::open(Config::default()).res().await.unwrap(); let _queryable = queryable_session .declare_queryable(&queryable_key_expr.clone()) .callback(move |query| { println!(">> Handling query '{}'", query.selector()); - let reply = Ok(Sample::new(queryable_key_expr.clone(), query.value().unwrap().clone())); + let reply = Ok(Sample::new( + queryable_key_expr.clone(), + query.value().unwrap().clone(), + )); futures::executor::block_on(async move { - query - .reply(reply) - .res() - .await - .unwrap(); - } - ) + query.reply(reply).res().await.unwrap(); + }) }) .complete(true) .res() @@ -47,15 +45,17 @@ async fn main() { println!("Declaring Get session for '{get_selector}'..."); let get_session = zenoh::open(Config::default()).res().await.unwrap(); - + for idx in 0..5 { tokio::time::sleep(Duration::from_secs(1)).await; println!("Sending Query '{get_selector}'..."); - let replies = get_session.get(&get_selector).with_value(idx) - .target(QueryTarget::All) - .res() - .await - .unwrap(); + let replies = get_session + .get(&get_selector) + .with_value(idx) + .target(QueryTarget::All) + .res() + .await + .unwrap(); while let Ok(reply) = replies.recv_async().await { match reply.sample { Ok(sample) => println!( @@ -68,4 +68,4 @@ async fn main() { } } tokio::time::sleep(Duration::from_secs(1)).await; -} \ No newline at end of file +} From f45df41a0d5ef668db471d53364041534e0e69a0 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 21 Mar 2024 12:31:48 +0000 Subject: [PATCH 11/38] -fix fmt; update valgrind-check script --- ci/valgrind-check/run.sh | 9 ++++++--- .../zenoh-link-unixpipe/src/unix/unicast.rs | 4 ++-- zenoh/src/net/routing/hat/linkstate_peer/mod.rs | 5 ++++- zenoh/src/net/routing/hat/router/mod.rs | 10 ++++++++-- zenoh/src/net/runtime/mod.rs | 12 +++++++++--- 5 files changed, 29 insertions(+), 11 deletions(-) diff --git a/ci/valgrind-check/run.sh b/ci/valgrind-check/run.sh index d603233fd1..7e2a7dd1a8 100755 --- a/ci/valgrind-check/run.sh +++ b/ci/valgrind-check/run.sh @@ -1,18 +1,21 @@ #!/usr/bin/env bash set -e +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + function check_leaks { echo "Checking $1 for memory leaks" - valgrind --leak-check=full --num-callers=50 --log-file="$1_leaks.log" ./target/debug/$1 - num_leaks=$(grep 'ERROR SUMMARY: [0-9]+' -Eo "$1_leaks.log" | grep '[0-9]+' -Eo) + valgrind --leak-check=full --num-callers=50 --log-file="$SCRIPT_DIR/$1_leaks.log" $SCRIPT_DIR/target/debug/$1 + num_leaks=$(grep 'ERROR SUMMARY: [0-9]+' -Eo "$SCRIPT_DIR/$1_leaks.log" | grep '[0-9]+' -Eo) echo "Detected $num_leaks memory leaks" if (( num_leaks == 0 )) then return 0 else + cat $SCRIPT_DIR/$1_leaks.log return -1 fi } -cargo build +cargo build --manifest-path=$SCRIPT_DIR/Cargo.toml check_leaks "queryable_get" check_leaks "pub_sub" \ No newline at end of file diff --git a/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs b/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs index d00d50d5ed..8941a67d5a 100644 --- a/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs +++ b/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs @@ -27,10 +27,10 @@ use std::io::ErrorKind; use std::io::{Read, Write}; use std::os::unix::fs::OpenOptionsExt; use std::sync::Arc; -use tokio::task::JoinHandle; use tokio::fs::remove_file; use tokio::io::unix::AsyncFd; use tokio::io::Interest; +use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use zenoh_core::{zasyncread, zasyncwrite, ResolveFuture, SyncResolve}; use zenoh_protocol::core::{EndPoint, Locator}; @@ -333,7 +333,7 @@ impl UnicastPipeListener { fn stop_listening(self) { self.token.cancel(); - let _ = ResolveFuture::new(async move { self.handle.await} ).res_sync(); + let _ = ResolveFuture::new(async move { self.handle.await }).res_sync(); } } diff --git a/zenoh/src/net/routing/hat/linkstate_peer/mod.rs b/zenoh/src/net/routing/hat/linkstate_peer/mod.rs index 945c8cae87..b7a3cfa448 100644 --- a/zenoh/src/net/routing/hat/linkstate_peer/mod.rs +++ b/zenoh/src/net/routing/hat/linkstate_peer/mod.rs @@ -120,7 +120,10 @@ impl Drop for HatTables { fn drop(&mut self) { if self.peers_trees_task.is_some() { let task = self.peers_trees_task.take().unwrap(); - ResolveFuture::new(async move { let _ = task.await; }).res_sync(); + ResolveFuture::new(async move { + let _ = task.await; + }) + .res_sync(); } } } diff --git a/zenoh/src/net/routing/hat/router/mod.rs b/zenoh/src/net/routing/hat/router/mod.rs index 7a9ac244ea..a933f41060 100644 --- a/zenoh/src/net/routing/hat/router/mod.rs +++ b/zenoh/src/net/routing/hat/router/mod.rs @@ -131,11 +131,17 @@ impl Drop for HatTables { fn drop(&mut self) { if self.peers_trees_task.is_some() { let task = self.peers_trees_task.take().unwrap(); - ResolveFuture::new(async move { let _ = task.await; }).res_sync(); + ResolveFuture::new(async move { + let _ = task.await; + }) + .res_sync(); } if self.routers_trees_task.is_some() { let task = self.routers_trees_task.take().unwrap(); - ResolveFuture::new(async move { let _ = task.await; }).res_sync(); + ResolveFuture::new(async move { + let _ = task.await; + }) + .res_sync(); } } } diff --git a/zenoh/src/net/runtime/mod.rs b/zenoh/src/net/runtime/mod.rs index f78b3ada12..da11ea0906 100644 --- a/zenoh/src/net/runtime/mod.rs +++ b/zenoh/src/net/runtime/mod.rs @@ -173,9 +173,15 @@ impl Runtime { // TODO: the call below is needed to prevent intermittent leak // due to not freed resource Arc, that apparently happens because // the task responsible for resource clean up was aborted earlier than expected. - // This should be resolved by identfying correspodning task, and placing - // cancelaltion token manually inside it. - self.router().tables.tables.write().unwrap().root_res.close(); + // This should be resolved by identfying correspodning task, and placing + // cancelaltion token manually inside it. + self.router() + .tables + .tables + .write() + .unwrap() + .root_res + .close(); Ok(()) } From 04aeeed3d3ea3985e1bcd1fbccfb3cecec55574e Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 21 Mar 2024 12:35:21 +0000 Subject: [PATCH 12/38] clippy --- io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs | 2 +- zenoh/src/net/routing/dispatcher/resource.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs b/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs index 8941a67d5a..b6d4195927 100644 --- a/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs +++ b/io/zenoh-links/zenoh-link-unixpipe/src/unix/unicast.rs @@ -333,7 +333,7 @@ impl UnicastPipeListener { fn stop_listening(self) { self.token.cancel(); - let _ = ResolveFuture::new(async move { self.handle.await }).res_sync(); + let _ = ResolveFuture::new(self.handle).res_sync(); } } diff --git a/zenoh/src/net/routing/dispatcher/resource.rs b/zenoh/src/net/routing/dispatcher/resource.rs index 2158387499..1762ff2cb4 100644 --- a/zenoh/src/net/routing/dispatcher/resource.rs +++ b/zenoh/src/net/routing/dispatcher/resource.rs @@ -318,7 +318,7 @@ impl Resource { pub fn close(self: &mut Arc) { let r = get_mut_unchecked(self); - for (_s, c) in &mut r.childs { + for c in r.childs.values_mut() { Self::close(c); } r.parent.take(); From e7134e5b89e1ab532cbdb00841e1648dac13816e Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 21 Mar 2024 15:16:33 +0000 Subject: [PATCH 13/38] drop runtimes instead of shutdown_background --- commons/zenoh-runtime/Cargo.toml | 1 + commons/zenoh-runtime/src/lib.rs | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/commons/zenoh-runtime/Cargo.toml b/commons/zenoh-runtime/Cargo.toml index b7aa15d634..e5bd64b8c5 100644 --- a/commons/zenoh-runtime/Cargo.toml +++ b/commons/zenoh-runtime/Cargo.toml @@ -13,6 +13,7 @@ description = { workspace = true } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +futures = { workspace = true } lazy_static = { workspace = true } zenoh-result = { workspace = true, features = ["std"] } zenoh-collections = { workspace = true, features = ["std"] } diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index 41cfc6655c..ebfd24ac4d 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -1,3 +1,4 @@ +use ::futures::executor; // // Copyright (c) 2024 ZettaScale Technology // @@ -145,20 +146,22 @@ impl ZRuntimePool { /// Force drops ZRUNTIME_POOL. /// /// Rust does not drop static variables. They are always reported by valgrind for exampler. -/// This function can be used force drop ZRUNTIME_POOL, to prevent valgrind reporting memory leaks related to it. +/// This function can be used to force drop ZRUNTIME_POOL, to prevent valgrind reporting memory leaks related to it. +/// If there are any blocking tasks spawned by ZRuntimes, the function will block until they return. #[doc(hidden)] pub unsafe fn zruntime_pool_free() { - let hm = &ZRUNTIME_POOL.0 as *const HashMap> - as *mut HashMap>; - for (_k, v) in hm.as_mut().unwrap().drain() { - if v.get().is_some() { + // the runtime can only be dropped in the blocking context + let _ = executor::block_on(tokio::task::spawn_blocking(|| { + let hm = &ZRUNTIME_POOL.0 as *const HashMap> + as *mut HashMap>; + for (_k, v) in hm.as_mut().unwrap().drain() { let r = v.get().unwrap(); let r_mut = (r as *const Runtime) as *mut Runtime; - r_mut.read().shutdown_background(); + std::mem::drop(r_mut.read()); std::mem::forget(v); } - } - std::mem::drop(hm.read()); + std::mem::drop(hm.read()); + })); } #[doc(hidden)] From 5c672cd8e183dae88c832f2fb5f9e1f783b28449 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 21 Mar 2024 15:28:39 +0000 Subject: [PATCH 14/38] include memory-leaks test in ci --- .github/workflows/ci.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 862f5edd8b..8b258e5d17 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,6 +102,16 @@ jobs: - name: Run doctests run: cargo test --doc + - name: Install valgrind + if: ${{ matrix.os == 'ubuntu-latest' }} + run: apt-get install -y valgrind + shell: bash + + - name: Run memory leaks check + if: ${{ matrix.os == 'ubuntu-latest' }} + run: ci/valgrind-check/run.sh + shell: bash + # NOTE: In GitHub repository settings, the "Require status checks to pass # before merging" branch protection rule ensures that commits are only merged # from branches where specific status checks have passed. These checks are From ca053132009d44cb4a99a596126afd12793a66b7 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 21 Mar 2024 15:45:09 +0000 Subject: [PATCH 15/38] add sudo before valgrind install in ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8b258e5d17..e147d11856 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -104,7 +104,7 @@ jobs: - name: Install valgrind if: ${{ matrix.os == 'ubuntu-latest' }} - run: apt-get install -y valgrind + run: sudo apt-get install -y valgrind shell: bash - name: Run memory leaks check From a94e86f7cc70e09d399efb7c81fe9f1ef07547e0 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 21 Mar 2024 17:34:42 +0000 Subject: [PATCH 16/38] replace ZRuntime drop with shutdown_timeout to prevent infinite wait time, if there any active blocking tasks left --- commons/zenoh-runtime/src/lib.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index ebfd24ac4d..c411e40a46 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -1,4 +1,3 @@ -use ::futures::executor; // // Copyright (c) 2024 ZettaScale Technology // @@ -22,6 +21,7 @@ use std::{ atomic::{AtomicUsize, Ordering}, OnceLock, }, + time::Duration, }; use tokio::runtime::{Handle, Runtime}; use zenoh_collections::Properties; @@ -145,23 +145,24 @@ impl ZRuntimePool { /// Force drops ZRUNTIME_POOL. /// -/// Rust does not drop static variables. They are always reported by valgrind for exampler. +/// Rust does not drop static variables. They are always reported by valgrind for example. /// This function can be used to force drop ZRUNTIME_POOL, to prevent valgrind reporting memory leaks related to it. /// If there are any blocking tasks spawned by ZRuntimes, the function will block until they return. #[doc(hidden)] pub unsafe fn zruntime_pool_free() { - // the runtime can only be dropped in the blocking context - let _ = executor::block_on(tokio::task::spawn_blocking(|| { + let fut = || { let hm = &ZRUNTIME_POOL.0 as *const HashMap> as *mut HashMap>; for (_k, v) in hm.as_mut().unwrap().drain() { let r = v.get().unwrap(); let r_mut = (r as *const Runtime) as *mut Runtime; - std::mem::drop(r_mut.read()); + r_mut.read().shutdown_timeout(Duration::from_secs(1)); std::mem::forget(v); } std::mem::drop(hm.read()); - })); + }; + // the runtime can only be dropped in the blocking context + let _ = futures::executor::block_on(tokio::task::spawn_blocking(fut)); } #[doc(hidden)] From 918b1f89708363726b6f1662fb209690a08805be Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Fri, 22 Mar 2024 10:12:48 +0000 Subject: [PATCH 17/38] docs update --- commons/zenoh-task/src/lib.rs | 22 ++++++++++++++-------- zenoh/src/net/runtime/mod.rs | 2 ++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/commons/zenoh-task/src/lib.rs b/commons/zenoh-task/src/lib.rs index 3d665f3035..35ae333a77 100644 --- a/commons/zenoh-task/src/lib.rs +++ b/commons/zenoh-task/src/lib.rs @@ -40,8 +40,8 @@ impl Default for TaskController { } impl TaskController { - /// Spawns a task (similarly to task::spawn) that can be later terminated by call to terminate_all() - /// Task output is ignored + /// Spawns a task that can be later terminated by call to [`TaskController::terminate_all()`]. + /// Task output is ignored. pub fn spawn(&self, future: F) -> JoinHandle<()> where F: Future + Send + 'static, @@ -57,7 +57,7 @@ impl TaskController { self.tracker.spawn(task) } - /// Spawns a task using a specified runtime (similarly to Runtime::spawn) that can be later terminated by call to terminate_all(). + /// Spawns a task using a specified runtime that can be later terminated by call to [`TaskController::terminate_all()`]. /// Task output is ignored. pub fn spawn_with_rt(&self, rt: ZRuntime, future: F) -> JoinHandle<()> where @@ -78,8 +78,9 @@ impl TaskController { self.token.child_token() } - /// Spawns a task that can be cancelled via cancelling a token obtained by get_cancellation_token(). - /// It can be later terminated by call to terminate_all(). + /// Spawns a task that can be cancelled via cancelling a token obtained by [`TaskController::get_cancellation_token()`], + /// or that can run to completion in finite amount of time. + /// It can be later terminated by call to [`TaskController::terminate_all()`]. pub fn spawn_cancellable(&self, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -88,8 +89,9 @@ impl TaskController { self.tracker.spawn(future) } - /// Spawns a task that can be cancelled via cancelling a token obtained by get_cancellation_token() using a specified runtime. - /// It can be later terminated by call to terminate_all(). + /// Spawns a task that can be cancelled via cancelling a token obtained by [`TaskController::get_cancellation_token()`], + /// or that can run to completion in finite amount of time, using a specified runtime. + /// It can be later aborted by call to [`TaskController::terminate_all()`]. pub fn spawn_cancellable_with_rt(&self, rt: ZRuntime, future: F) -> JoinHandle where F: Future + Send + 'static, @@ -99,6 +101,10 @@ impl TaskController { } /// Terminates all prevously spawned tasks + /// The caller must ensure that all tasks spawned with [`TaskController::spawn_cancellable()`] + /// or [`TaskController::spawn_cancellable_with_rt()`] can yield in finite amount of time either because they will run to completion + /// or due to cancellation of token acquired via [`TaskController::get_cancellation_token()`]. + /// Tasks spawned with [`TaskController::spawn()`] or [`TaskController::spawn_with_rt()`] will be aborted (i.e. terminated upon next await call). pub fn terminate_all(&self) { self.tracker.close(); self.token.cancel(); @@ -106,7 +112,7 @@ impl TaskController { futures::executor::block_on(async move { tracker.wait().await }); } - /// Terminates all prevously spawned tasks + /// Async version of [`TaskController::terminate_all()`]. pub async fn terminate_all_async(&self) { self.tracker.close(); self.token.cancel(); diff --git a/zenoh/src/net/runtime/mod.rs b/zenoh/src/net/runtime/mod.rs index da11ea0906..128ee6cd87 100644 --- a/zenoh/src/net/runtime/mod.rs +++ b/zenoh/src/net/runtime/mod.rs @@ -193,6 +193,8 @@ impl Runtime { self.state.locators.read().unwrap().clone() } + /// Spawns a task within runtime. + /// Upon runtime close the task will be automatically aborted. pub(crate) fn spawn(&self, future: F) -> JoinHandle<()> where F: Future + Send + 'static, From 1b4622fc97a57a0b71a0f1a3f4fe1736e3925ce4 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Mon, 25 Mar 2024 21:54:54 +0000 Subject: [PATCH 18/38] - rename spawn -> spawn_abortable, spawn_cancellable -> spawn, for more clarity; - Some tasks now terminate in more graceful way --- commons/zenoh-task/Cargo.toml | 1 + commons/zenoh-task/src/lib.rs | 45 +++++++---- io/zenoh-transport/src/manager.rs | 12 ++- io/zenoh-transport/src/multicast/transport.rs | 2 +- zenoh/src/net/runtime/mod.rs | 40 ++++++++-- zenoh/src/net/runtime/orchestrator.rs | 75 ++++++++++--------- 6 files changed, 116 insertions(+), 59 deletions(-) diff --git a/commons/zenoh-task/Cargo.toml b/commons/zenoh-task/Cargo.toml index a4b7ad21f8..99a8bd5612 100644 --- a/commons/zenoh-task/Cargo.toml +++ b/commons/zenoh-task/Cargo.toml @@ -27,5 +27,6 @@ description = "Internal crate for zenoh." [dependencies] tokio = { workspace = true, features = ["default", "sync"] } futures = { workspace = true } +log = { workspace = true } zenoh-runtime = { workspace = true } tokio-util = { workspace = true, features = ["rt"] } \ No newline at end of file diff --git a/commons/zenoh-task/src/lib.rs b/commons/zenoh-task/src/lib.rs index 35ae333a77..4b494e6444 100644 --- a/commons/zenoh-task/src/lib.rs +++ b/commons/zenoh-task/src/lib.rs @@ -19,10 +19,12 @@ //! [Click here for Zenoh's documentation](../zenoh/index.html) use std::future::Future; +use std::time::Duration; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tokio_util::task::TaskTracker; use zenoh_runtime::ZRuntime; +use futures::future::FutureExt; #[derive(Clone)] pub struct TaskController { @@ -42,7 +44,7 @@ impl Default for TaskController { impl TaskController { /// Spawns a task that can be later terminated by call to [`TaskController::terminate_all()`]. /// Task output is ignored. - pub fn spawn(&self, future: F) -> JoinHandle<()> + pub fn spawn_abortable(&self, future: F) -> JoinHandle<()> where F: Future + Send + 'static, T: Send + 'static, @@ -59,7 +61,7 @@ impl TaskController { /// Spawns a task using a specified runtime that can be later terminated by call to [`TaskController::terminate_all()`]. /// Task output is ignored. - pub fn spawn_with_rt(&self, rt: ZRuntime, future: F) -> JoinHandle<()> + pub fn spawn_abortable_with_rt(&self, rt: ZRuntime, future: F) -> JoinHandle<()> where F: Future + Send + 'static, T: Send + 'static, @@ -78,44 +80,59 @@ impl TaskController { self.token.child_token() } - /// Spawns a task that can be cancelled via cancelling a token obtained by [`TaskController::get_cancellation_token()`], + /// Spawns a task that can be cancelled via cancellation of a token obtained by [`TaskController::get_cancellation_token()`], /// or that can run to completion in finite amount of time. /// It can be later terminated by call to [`TaskController::terminate_all()`]. - pub fn spawn_cancellable(&self, future: F) -> JoinHandle + pub fn spawn(&self, future: F) -> JoinHandle<()> where F: Future + Send + 'static, T: Send + 'static, { - self.tracker.spawn(future) + self.tracker.spawn(future.map(|_f| ())) } - /// Spawns a task that can be cancelled via cancelling a token obtained by [`TaskController::get_cancellation_token()`], + /// Spawns a task that can be cancelled via cancellation of a token obtained by [`TaskController::get_cancellation_token()`], /// or that can run to completion in finite amount of time, using a specified runtime. /// It can be later aborted by call to [`TaskController::terminate_all()`]. - pub fn spawn_cancellable_with_rt(&self, rt: ZRuntime, future: F) -> JoinHandle + pub fn spawn_with_rt(&self, rt: ZRuntime, future: F) -> JoinHandle<()> where F: Future + Send + 'static, T: Send + 'static, { - self.tracker.spawn_on(future, &rt) + self.tracker.spawn_on(future.map(|_f| ()), &rt) } /// Terminates all prevously spawned tasks - /// The caller must ensure that all tasks spawned with [`TaskController::spawn_cancellable()`] - /// or [`TaskController::spawn_cancellable_with_rt()`] can yield in finite amount of time either because they will run to completion + /// The caller must ensure that all tasks spawned with [`TaskController::spawn()`] + /// or [`TaskController::spawn_with_rt()`] can yield in finite amount of time either because they will run to completion /// or due to cancellation of token acquired via [`TaskController::get_cancellation_token()`]. - /// Tasks spawned with [`TaskController::spawn()`] or [`TaskController::spawn_with_rt()`] will be aborted (i.e. terminated upon next await call). + /// Tasks spawned with [`TaskController::spawn_abortable()`] or [`TaskController::spawn_abortable_with_rt()`] will be aborted (i.e. terminated upon next await call). pub fn terminate_all(&self) { self.tracker.close(); self.token.cancel(); - let tracker = self.tracker.clone(); - futures::executor::block_on(async move { tracker.wait().await }); + let task = async move { + tokio::select! { + _ = tokio::time::sleep(Duration::from_secs(10)) => { + log::error!("Failed to terminate {} tasks", self.tracker.len()); + } + _ = self.tracker.wait() => {} + } + }; + let _ = futures::executor::block_on(task); } /// Async version of [`TaskController::terminate_all()`]. pub async fn terminate_all_async(&self) { self.tracker.close(); self.token.cancel(); - self.tracker.wait().await; + let task = async move { + tokio::select! { + _ = tokio::time::sleep(Duration::from_secs(10)) => { + log::error!("Failed to terminate {} tasks", self.tracker.len()); + } + _ = self.tracker.wait() => {} + } + }; + task.await; } } diff --git a/io/zenoh-transport/src/manager.rs b/io/zenoh-transport/src/manager.rs index b66eeac501..63934c379d 100644 --- a/io/zenoh-transport/src/manager.rs +++ b/io/zenoh-transport/src/manager.rs @@ -347,14 +347,20 @@ impl TransportManager { }; // @TODO: this should be moved into the unicast module + let cancellation_token = this.task_controller.get_cancellation_token(); this.task_controller .spawn_with_rt(zenoh_runtime::ZRuntime::Net, { let this = this.clone(); async move { loop { - if let Ok(link) = new_unicast_link_receiver.recv_async().await { - this.handle_new_link_unicast(link).await; - } + tokio::select! { + res = new_unicast_link_receiver.recv_async() => { + if let Ok(link) = res { + this.handle_new_link_unicast(link).await; + } + } + _ = cancellation_token.cancelled() => { break; } + } } } }); diff --git a/io/zenoh-transport/src/multicast/transport.rs b/io/zenoh-transport/src/multicast/transport.rs index 94564b74fd..f2df1b4061 100644 --- a/io/zenoh-transport/src/multicast/transport.rs +++ b/io/zenoh-transport/src/multicast/transport.rs @@ -390,7 +390,7 @@ impl TransportMulticastInner { }; self.task_controller - .spawn_cancellable_with_rt(zenoh_runtime::ZRuntime::Acceptor, task); + .spawn_with_rt(zenoh_runtime::ZRuntime::Acceptor, task); // TODO(yuyuan): Integrate the above async task into TransportMulticastPeer // Store the new peer diff --git a/zenoh/src/net/runtime/mod.rs b/zenoh/src/net/runtime/mod.rs index 128ee6cd87..1629a80321 100644 --- a/zenoh/src/net/runtime/mod.rs +++ b/zenoh/src/net/runtime/mod.rs @@ -28,6 +28,7 @@ use crate::GIT_VERSION; pub use adminspace::AdminSpace; use futures::stream::StreamExt; use futures::Future; +use tokio_util::sync::CancellationToken; use std::any::Any; use std::sync::{Arc, Weak}; use tokio::task::JoinHandle; @@ -137,15 +138,26 @@ impl Runtime { get_mut_unchecked(&mut runtime.state.router.clone()).init_link_state(runtime.clone()); let receiver = config.subscribe(); + let token = runtime.get_cancellation_token(); runtime.spawn({ let runtime2 = runtime.clone(); async move { let mut stream = receiver.into_stream(); - while let Some(event) = stream.next().await { - if &*event == "connect/endpoints" { - if let Err(e) = runtime2.update_peers().await { - log::error!("Error updating peers: {}", e); + loop { + tokio::select! { + res = stream.next() => { + match res { + Some(event) => { + if &*event == "connect/endpoints" { + if let Err(e) = runtime2.update_peers().await { + log::error!("Error updating peers: {}", e); + } + } + }, + None => { break; } + } } + _ = token.cancelled() => { break; } } } } @@ -174,7 +186,7 @@ impl Runtime { // due to not freed resource Arc, that apparently happens because // the task responsible for resource clean up was aborted earlier than expected. // This should be resolved by identfying correspodning task, and placing - // cancelaltion token manually inside it. + // cancellation token manually inside it. self.router() .tables .tables @@ -194,7 +206,7 @@ impl Runtime { } /// Spawns a task within runtime. - /// Upon runtime close the task will be automatically aborted. + /// Upon close runtime will block until this task completes pub(crate) fn spawn(&self, future: F) -> JoinHandle<()> where F: Future + Send + 'static, @@ -205,6 +217,18 @@ impl Runtime { .spawn_with_rt(zenoh_runtime::ZRuntime::Net, future) } + /// Spawns a task within runtime. + /// Upon runtime close the task will be automatically aborted. + pub(crate) fn spawn_abortable(&self, future: F) -> JoinHandle<()> + where + F: Future + Send + 'static, + T: Send + 'static, + { + self.state + .task_controller + .spawn_abortable_with_rt(zenoh_runtime::ZRuntime::Net, future) + } + pub(crate) fn router(&self) -> Arc { self.state.router.clone() } @@ -230,6 +254,10 @@ impl Runtime { state: Arc::downgrade(&this.state), } } + + pub fn get_cancellation_token(&self) -> CancellationToken { + self.state.task_controller.get_cancellation_token() + } } struct RuntimeTransportEventHandler { diff --git a/zenoh/src/net/runtime/orchestrator.rs b/zenoh/src/net/runtime/orchestrator.rs index 298548f3b7..accb09f1c8 100644 --- a/zenoh/src/net/runtime/orchestrator.rs +++ b/zenoh/src/net/runtime/orchestrator.rs @@ -220,7 +220,7 @@ impl Runtime { let this = self.clone(); match (listen, autoconnect.is_empty()) { (true, false) => { - self.spawn(async move { + self.spawn_abortable(async move { tokio::select! { _ = this.responder(&mcast_socket, &sockets) => {}, _ = this.connect_all(&sockets, autoconnect, &addr) => {}, @@ -228,12 +228,12 @@ impl Runtime { }); } (true, true) => { - self.spawn(async move { + self.spawn_abortable(async move { this.responder(&mcast_socket, &sockets).await; }); } (false, false) => { - self.spawn( + self.spawn_abortable( async move { this.connect_all(&sockets, autoconnect, &addr).await }, ); } @@ -479,43 +479,44 @@ impl Runtime { async fn peer_connector(&self, peer: EndPoint) { let mut delay = CONNECTION_RETRY_INITIAL_PERIOD; + let cancellation_token = self.get_cancellation_token(); loop { log::trace!("Trying to connect to configured peer {}", peer); let endpoint = peer.clone(); - match tokio::time::timeout( - CONNECTION_TIMEOUT, - self.manager().open_transport_unicast(endpoint), - ) - .await - { - Ok(Ok(transport)) => { - log::debug!("Successfully connected to configured peer {}", peer); - if let Ok(Some(orch_transport)) = transport.get_callback() { - if let Some(orch_transport) = orch_transport - .as_any() - .downcast_ref::() - { - *zwrite!(orch_transport.endpoint) = Some(peer); + tokio::select! { + res = tokio::time::timeout(CONNECTION_TIMEOUT, self.manager().open_transport_unicast(endpoint)) => { + match res { + Ok(Ok(transport)) => { + log::debug!("Successfully connected to configured peer {}", peer); + if let Ok(Some(orch_transport)) = transport.get_callback() { + if let Some(orch_transport) = orch_transport + .as_any() + .downcast_ref::() + { + *zwrite!(orch_transport.endpoint) = Some(peer); + } + } + break; + } + Ok(Err(e)) => { + log::debug!( + "Unable to connect to configured peer {}! {}. Retry in {:?}.", + peer, + e, + delay + ); + } + Err(e) => { + log::debug!( + "Unable to connect to configured peer {}! {}. Retry in {:?}.", + peer, + e, + delay + ); } } - break; - } - Ok(Err(e)) => { - log::debug!( - "Unable to connect to configured peer {}! {}. Retry in {:?}.", - peer, - e, - delay - ); - } - Err(e) => { - log::debug!( - "Unable to connect to configured peer {}! {}. Retry in {:?}.", - peer, - e, - delay - ); } + _ = cancellation_token.cancelled() => { break; } } tokio::time::sleep(delay).await; delay *= CONNECTION_RETRY_PERIOD_INCREASE_FACTOR; @@ -842,10 +843,14 @@ impl Runtime { match session.runtime.whatami() { WhatAmI::Client => { let runtime = session.runtime.clone(); + let cancellation_token = runtime.get_cancellation_token(); session.runtime.spawn(async move { let mut delay = CONNECTION_RETRY_INITIAL_PERIOD; while runtime.start_client().await.is_err() { - tokio::time::sleep(delay).await; + tokio::select! { + _ = tokio::time::sleep(delay) => {} + _ = cancellation_token.cancelled() => { break; } + } delay *= CONNECTION_RETRY_PERIOD_INCREASE_FACTOR; if delay > CONNECTION_RETRY_MAX_PERIOD { delay = CONNECTION_RETRY_MAX_PERIOD; From 13ae47b4c221d900deef3ac1263fa3dcbe672d2a Mon Sep 17 00:00:00 2001 From: yuanyuyuan Date: Tue, 26 Mar 2024 15:48:39 +0800 Subject: [PATCH 19/38] Simplify the drop of ZRUNTIME_POOL --- commons/zenoh-runtime/src/lib.rs | 40 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index 5c4a88b736..7ba218e007 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -148,35 +148,39 @@ impl ZRuntimePool { } } +impl Drop for ZRuntimePool { + fn drop(&mut self) { + let handles: Vec<_> = self + .0 + .drain() + .map(|(name, mut rt)| { + std::thread::spawn(move || { + rt.take() + .expect(&format!("ZRuntime {name:?} failed to shutdown.")) + .shutdown_timeout(Duration::from_secs(1)) + }) + }) + .collect(); + + for hd in handles { + let _ = hd.join(); + } + } +} + /// Force drops ZRUNTIME_POOL. /// /// Rust does not drop static variables. They are always reported by valgrind for example. /// This function can be used to force drop ZRUNTIME_POOL, to prevent valgrind reporting memory leaks related to it. /// If there are any blocking tasks spawned by ZRuntimes, the function will block until they return. -#[doc(hidden)] -pub unsafe fn zruntime_pool_free() { - let fut = || { - let hm = &ZRUNTIME_POOL.0 as *const HashMap> - as *mut HashMap>; - for (_k, v) in hm.as_mut().unwrap().drain() { - let r = v.get().unwrap(); - let r_mut = (r as *const Runtime) as *mut Runtime; - r_mut.read().shutdown_timeout(Duration::from_secs(1)); - std::mem::forget(v); - } - std::mem::drop(hm.read()); - }; - // the runtime can only be dropped in the blocking context - let _ = futures::executor::block_on(tokio::task::spawn_blocking(fut)); -} - #[doc(hidden)] pub struct ZRuntimePoolGuard; impl Drop for ZRuntimePoolGuard { fn drop(&mut self) { unsafe { - zruntime_pool_free(); + let ptr = &(*ZRUNTIME_POOL) as *const ZRuntimePool; + std::mem::drop(ptr.read()); } } } From 93eeacffa999ed718f045de98df39912a91e576e Mon Sep 17 00:00:00 2001 From: yuanyuyuan Date: Tue, 26 Mar 2024 22:31:27 +0800 Subject: [PATCH 20/38] Update the doc --- commons/zenoh-runtime/src/lib.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index 7ba218e007..6b62fdf7b7 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -148,6 +148,7 @@ impl ZRuntimePool { } } +// If there are any blocking tasks spawned by ZRuntimes, the function will block until they return. impl Drop for ZRuntimePool { fn drop(&mut self) { let handles: Vec<_> = self @@ -156,7 +157,7 @@ impl Drop for ZRuntimePool { .map(|(name, mut rt)| { std::thread::spawn(move || { rt.take() - .expect(&format!("ZRuntime {name:?} failed to shutdown.")) + .unwrap_or_else(|| panic!("ZRuntime {name:?} failed to shutdown.")) .shutdown_timeout(Duration::from_secs(1)) }) }) @@ -168,11 +169,8 @@ impl Drop for ZRuntimePool { } } -/// Force drops ZRUNTIME_POOL. -/// -/// Rust does not drop static variables. They are always reported by valgrind for example. -/// This function can be used to force drop ZRUNTIME_POOL, to prevent valgrind reporting memory leaks related to it. -/// If there are any blocking tasks spawned by ZRuntimes, the function will block until they return. +/// In order to prevent valgrind reporting memory leaks, +/// we use this guard to force drop ZRUNTIME_POOL since Rust does not drop static variables. #[doc(hidden)] pub struct ZRuntimePoolGuard; From 9841bea2613db881968f8912a2eb30edcc3bfbd5 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Tue, 26 Mar 2024 18:07:01 +0000 Subject: [PATCH 21/38] - use ZRuntime executor instead of futures::executor - terminate more tasks - make TaskController::terminate_all[_async] accept timeout duration --- .../src/queryable_get/bin/z_queryable_get.rs | 6 +- commons/zenoh-task/Cargo.toml | 1 + commons/zenoh-task/src/lib.rs | 89 ++++++++++--- io/zenoh-transport/src/manager.rs | 4 +- io/zenoh-transport/src/multicast/transport.rs | 4 +- zenoh-ext/Cargo.toml | 1 + zenoh-ext/src/publication_cache.rs | 123 +++++++++--------- zenoh/src/net/routing/dispatcher/face.rs | 3 + zenoh/src/net/routing/dispatcher/queries.rs | 28 ++-- zenoh/src/net/routing/dispatcher/tables.rs | 1 + .../src/net/routing/hat/linkstate_peer/mod.rs | 51 ++++---- zenoh/src/net/routing/hat/router/mod.rs | 76 ++++++----- zenoh/src/net/runtime/mod.rs | 5 +- zenoh/src/net/tests/tables.rs | 4 +- zenoh/src/session.rs | 31 +++-- zenoh/tests/routing.rs | 2 +- 16 files changed, 257 insertions(+), 172 deletions(-) diff --git a/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs b/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs index 1307bb1819..102b6a036c 100644 --- a/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs +++ b/ci/valgrind-check/src/queryable_get/bin/z_queryable_get.rs @@ -34,9 +34,9 @@ async fn main() { queryable_key_expr.clone(), query.value().unwrap().clone(), )); - futures::executor::block_on(async move { - query.reply(reply).res().await.unwrap(); - }) + zenoh_runtime::ZRuntime::Application.block_in_place( + async move { query.reply(reply).res().await.unwrap(); } + ); }) .complete(true) .res() diff --git a/commons/zenoh-task/Cargo.toml b/commons/zenoh-task/Cargo.toml index 99a8bd5612..bf52f13735 100644 --- a/commons/zenoh-task/Cargo.toml +++ b/commons/zenoh-task/Cargo.toml @@ -28,5 +28,6 @@ description = "Internal crate for zenoh." tokio = { workspace = true, features = ["default", "sync"] } futures = { workspace = true } log = { workspace = true } +zenoh-core = { workspace = true } zenoh-runtime = { workspace = true } tokio-util = { workspace = true, features = ["rt"] } \ No newline at end of file diff --git a/commons/zenoh-task/src/lib.rs b/commons/zenoh-task/src/lib.rs index 54d93ffe3b..7b305cee75 100644 --- a/commons/zenoh-task/src/lib.rs +++ b/commons/zenoh-task/src/lib.rs @@ -24,6 +24,7 @@ use std::time::Duration; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tokio_util::task::TaskTracker; +use zenoh_core::{ResolveFuture, SyncResolve}; use zenoh_runtime::ZRuntime; #[derive(Clone)] @@ -102,37 +103,89 @@ impl TaskController { self.tracker.spawn_on(future.map(|_f| ()), &rt) } - /// Terminates all prevously spawned tasks + /// Attempts tp terminate all previously spawned tasks /// The caller must ensure that all tasks spawned with [`TaskController::spawn()`] /// or [`TaskController::spawn_with_rt()`] can yield in finite amount of time either because they will run to completion /// or due to cancellation of token acquired via [`TaskController::get_cancellation_token()`]. /// Tasks spawned with [`TaskController::spawn_abortable()`] or [`TaskController::spawn_abortable_with_rt()`] will be aborted (i.e. terminated upon next await call). - pub fn terminate_all(&self) { + /// The call blocks until all tasks yield or timeout duration expires. + /// Returns 0 in case of success, number of non terminated tasks otherwise. + pub fn terminate_all(&self, timeout: Duration) -> usize { + ResolveFuture::new(async move { self.terminate_all_async(timeout).await }).res_sync() + } + + /// Async version of [`TaskController::terminate_all()`]. + pub async fn terminate_all_async(&self, timeout: Duration) -> usize { self.tracker.close(); self.token.cancel(); + if tokio::time::timeout(timeout, self.tracker.wait()) + .await + .is_err() + { + log::error!("Failed to terminate {} tasks", self.tracker.len()); + return self.tracker.len(); + } + 0 + } +} + +pub struct TerminatableTask { + handle: JoinHandle<()>, + token: CancellationToken, +} + +impl TerminatableTask { + pub fn create_cancellation_token() -> CancellationToken { + CancellationToken::new() + } + + /// Spawns a task that can be later terminated by [`TerminatableTask::terminate()`]. + /// Prior to termination attempt the specified cancellation token will be cancelled. + pub fn spawn(rt: ZRuntime, future: F, token: CancellationToken) -> TerminatableTask + where + F: Future + Send + 'static, + T: Send + 'static, + { + TerminatableTask { + handle: rt.spawn(future.map(|_f| ())), + token, + } + } + + /// Spawns a task that can be later aborted by [`TerminatableTask::terminate()`]. + pub fn spawn_abortable(rt: ZRuntime, future: F) -> TerminatableTask + where + F: Future + Send + 'static, + T: Send + 'static, + { + let token = CancellationToken::new(); + let token2 = token.clone(); let task = async move { tokio::select! { - _ = tokio::time::sleep(Duration::from_secs(10)) => { - log::error!("Failed to terminate {} tasks", self.tracker.len()); - } - _ = self.tracker.wait() => {} + _ = token2.cancelled() => {}, + _ = future => {} } }; - futures::executor::block_on(task); + + TerminatableTask { + handle: rt.spawn(task), + token, + } } - /// Async version of [`TaskController::terminate_all()`]. - pub async fn terminate_all_async(&self) { - self.tracker.close(); + /// Attempts to terminate the task. + /// Returns true if task completed / aborted within timeout duration, false otherwise. + pub fn terminate(self, timeout: Duration) -> bool { + ResolveFuture::new(async move { self.terminate_async(timeout).await }).res_sync() + } + + /// Async version of [`TerminatableTask::terminate()`]. + pub async fn terminate_async(self, timeout: Duration) -> bool { self.token.cancel(); - let task = async move { - tokio::select! { - _ = tokio::time::sleep(Duration::from_secs(10)) => { - log::error!("Failed to terminate {} tasks", self.tracker.len()); - } - _ = self.tracker.wait() => {} - } + if tokio::time::timeout(timeout, self.handle).await.is_err() { + log::error!("Failed to terminate the task"); + return false; }; - task.await; + true } } diff --git a/io/zenoh-transport/src/manager.rs b/io/zenoh-transport/src/manager.rs index 32d60df30a..f97c126f8b 100644 --- a/io/zenoh-transport/src/manager.rs +++ b/io/zenoh-transport/src/manager.rs @@ -397,7 +397,9 @@ impl TransportManager { pub async fn close(&self) { self.close_unicast().await; - self.task_controller.terminate_all(); + self.task_controller + .terminate_all_async(Duration::from_secs(10)) + .await; } /*************************************/ diff --git a/io/zenoh-transport/src/multicast/transport.rs b/io/zenoh-transport/src/multicast/transport.rs index f2df1b4061..2e7f54098d 100644 --- a/io/zenoh-transport/src/multicast/transport.rs +++ b/io/zenoh-transport/src/multicast/transport.rs @@ -184,7 +184,9 @@ impl TransportMulticastInner { cb.closed(); } - self.task_controller.terminate_all_async().await; + self.task_controller + .terminate_all_async(Duration::from_secs(10)) + .await; Ok(()) } diff --git a/zenoh-ext/Cargo.toml b/zenoh-ext/Cargo.toml index 372eaf234a..6a0488cb54 100644 --- a/zenoh-ext/Cargo.toml +++ b/zenoh-ext/Cargo.toml @@ -45,6 +45,7 @@ zenoh-result = { workspace = true } zenoh-sync = { workspace = true } zenoh-util = { workspace = true } zenoh-runtime = { workspace = true } +zenoh-task = { workspace = true } [dev-dependencies] clap = { workspace = true, features = ["derive"] } diff --git a/zenoh-ext/src/publication_cache.rs b/zenoh-ext/src/publication_cache.rs index c8c5679c91..eec398d592 100644 --- a/zenoh-ext/src/publication_cache.rs +++ b/zenoh-ext/src/publication_cache.rs @@ -11,16 +11,17 @@ // Contributors: // ZettaScale Zenoh Team, // -use flume::{bounded, Sender}; use std::collections::{HashMap, VecDeque}; use std::convert::TryInto; use std::future::Ready; +use std::time::Duration; use zenoh::prelude::r#async::*; use zenoh::queryable::{Query, Queryable}; use zenoh::subscriber::FlumeSubscriber; use zenoh::SessionRef; use zenoh_core::{AsyncResolve, Resolvable, SyncResolve}; use zenoh_result::{bail, ZResult}; +use zenoh_task::TerminatableTask; use zenoh_util::core::ResolveFuture; /// The builder of PublicationCache, allowing to configure it. @@ -110,7 +111,7 @@ impl<'a> AsyncResolve for PublicationCacheBuilder<'a, '_, '_> { pub struct PublicationCache<'a> { local_sub: FlumeSubscriber<'a>, _queryable: Queryable<'a, flume::Receiver>, - _stoptx: Sender, + task: TerminatableTask, } impl<'a> PublicationCache<'a> { @@ -166,58 +167,46 @@ impl<'a> PublicationCache<'a> { let history = conf.history; // TODO(yuyuan): use CancellationToken to manage it - let (stoptx, stoprx) = bounded::(1); - zenoh_runtime::ZRuntime::TX.spawn(async move { - let mut cache: HashMap> = - HashMap::with_capacity(resources_limit.unwrap_or(32)); - let limit = resources_limit.unwrap_or(usize::MAX); + let token = TerminatableTask::create_cancellation_token(); + let token2 = token.clone(); + let task = TerminatableTask::spawn( + zenoh_runtime::ZRuntime::TX, + async move { + let mut cache: HashMap> = + HashMap::with_capacity(resources_limit.unwrap_or(32)); + let limit = resources_limit.unwrap_or(usize::MAX); + loop { + tokio::select! { + // on publication received by the local subscriber, store it + sample = sub_recv.recv_async() => { + if let Ok(sample) = sample { + let queryable_key_expr: KeyExpr<'_> = if let Some(prefix) = &queryable_prefix { + prefix.join(&sample.key_expr).unwrap().into() + } else { + sample.key_expr.clone() + }; - loop { - tokio::select! { - // on publication received by the local subscriber, store it - sample = sub_recv.recv_async() => { - if let Ok(sample) = sample { - let queryable_key_expr: KeyExpr<'_> = if let Some(prefix) = &queryable_prefix { - prefix.join(&sample.key_expr).unwrap().into() - } else { - sample.key_expr.clone() - }; - - if let Some(queue) = cache.get_mut(queryable_key_expr.as_keyexpr()) { - if queue.len() >= history { - queue.pop_front(); + if let Some(queue) = cache.get_mut(queryable_key_expr.as_keyexpr()) { + if queue.len() >= history { + queue.pop_front(); + } + queue.push_back(sample); + } else if cache.len() >= limit { + log::error!("PublicationCache on {}: resource_limit exceeded - can't cache publication for a new resource", + pub_key_expr); + } else { + let mut queue: VecDeque = VecDeque::new(); + queue.push_back(sample); + cache.insert(queryable_key_expr.into(), queue); } - queue.push_back(sample); - } else if cache.len() >= limit { - log::error!("PublicationCache on {}: resource_limit exceeded - can't cache publication for a new resource", - pub_key_expr); - } else { - let mut queue: VecDeque = VecDeque::new(); - queue.push_back(sample); - cache.insert(queryable_key_expr.into(), queue); } - } - }, + }, - // on query, reply with cach content - query = quer_recv.recv_async() => { - if let Ok(query) = query { - if !query.selector().key_expr.as_str().contains('*') { - if let Some(queue) = cache.get(query.selector().key_expr.as_keyexpr()) { - for sample in queue { - if let (Ok(Some(time_range)), Some(timestamp)) = (query.selector().time_range(), sample.timestamp) { - if !time_range.contains(timestamp.get_time().to_system_time()){ - continue; - } - } - if let Err(e) = query.reply(Ok(sample.clone())).res_async().await { - log::warn!("Error replying to query: {}", e); - } - } - } - } else { - for (key_expr, queue) in cache.iter() { - if query.selector().key_expr.intersects(unsafe{ keyexpr::from_str_unchecked(key_expr) }) { + // on query, reply with cach content + query = quer_recv.recv_async() => { + if let Ok(query) = query { + if !query.selector().key_expr.as_str().contains('*') { + if let Some(queue) = cache.get(query.selector().key_expr.as_keyexpr()) { for sample in queue { if let (Ok(Some(time_range)), Some(timestamp)) = (query.selector().time_range(), sample.timestamp) { if !time_range.contains(timestamp.get_time().to_system_time()){ @@ -229,21 +218,35 @@ impl<'a> PublicationCache<'a> { } } } + } else { + for (key_expr, queue) in cache.iter() { + if query.selector().key_expr.intersects(unsafe{ keyexpr::from_str_unchecked(key_expr) }) { + for sample in queue { + if let (Ok(Some(time_range)), Some(timestamp)) = (query.selector().time_range(), sample.timestamp) { + if !time_range.contains(timestamp.get_time().to_system_time()){ + continue; + } + } + if let Err(e) = query.reply(Ok(sample.clone())).res_async().await { + log::warn!("Error replying to query: {}", e); + } + } + } + } } } - } - }, - - // When stoptx is dropped, stop the task - _ = stoprx.recv_async() => return + }, + _ = token2.cancelled() => return + } } - } - }); + }, + token, + ); Ok(PublicationCache { local_sub, _queryable: queryable, - _stoptx: stoptx, + task, }) } @@ -254,11 +257,11 @@ impl<'a> PublicationCache<'a> { let PublicationCache { _queryable, local_sub, - _stoptx, + task, } = self; _queryable.undeclare().res_async().await?; local_sub.undeclare().res_async().await?; - drop(_stoptx); + task.terminate(Duration::from_secs(10)); Ok(()) }) } diff --git a/zenoh/src/net/routing/dispatcher/face.rs b/zenoh/src/net/routing/dispatcher/face.rs index 94f66ded8d..3d7d850e6b 100644 --- a/zenoh/src/net/routing/dispatcher/face.rs +++ b/zenoh/src/net/routing/dispatcher/face.rs @@ -27,6 +27,7 @@ use zenoh_protocol::{ network::{Mapping, Push, Request, RequestId, Response, ResponseFinal}, }; use zenoh_sync::get_mut_unchecked; +use zenoh_task::TaskController; use zenoh_transport::multicast::TransportMulticast; #[cfg(feature = "stats")] use zenoh_transport::stats::TransportStats; @@ -45,6 +46,7 @@ pub struct FaceState { pub(crate) mcast_group: Option, pub(crate) in_interceptors: Option>, pub(crate) hat: Box, + pub(crate) task_controller: TaskController, } impl FaceState { @@ -73,6 +75,7 @@ impl FaceState { mcast_group, in_interceptors, hat, + task_controller: TaskController::default(), }) } diff --git a/zenoh/src/net/routing/dispatcher/queries.rs b/zenoh/src/net/routing/dispatcher/queries.rs index 3105c6195f..01c681e0d6 100644 --- a/zenoh/src/net/routing/dispatcher/queries.rs +++ b/zenoh/src/net/routing/dispatcher/queries.rs @@ -599,10 +599,16 @@ pub fn route_query( face: Arc::downgrade(outface), qid: *qid, }; - zenoh_runtime::ZRuntime::Net.spawn(async move { - tokio::time::sleep(timeout).await; - cleanup.run().await - }); + let cancellation_token = face.task_controller.get_cancellation_token(); + face.task_controller.spawn_with_rt( + zenoh_runtime::ZRuntime::Net, + async move { + tokio::select! { + _ = tokio::time::sleep(timeout) => { cleanup.run().await } + _ = cancellation_token.cancelled() => {} + } + }, + ); #[cfg(feature = "stats")] if !admin { inc_req_stats!(outface, tx, user, body) @@ -636,10 +642,16 @@ pub fn route_query( face: Arc::downgrade(outface), qid: *qid, }; - zenoh_runtime::ZRuntime::Net.spawn(async move { - tokio::time::sleep(timeout).await; - cleanup.run().await - }); + let cancellation_token = face.task_controller.get_cancellation_token(); + face.task_controller.spawn_with_rt( + zenoh_runtime::ZRuntime::Net, + async move { + tokio::select! { + _ = tokio::time::sleep(timeout) => { cleanup.run().await } + _ = cancellation_token.cancelled() => {} + } + }, + ); #[cfg(feature = "stats")] if !admin { inc_req_stats!(outface, tx, user, body) diff --git a/zenoh/src/net/routing/dispatcher/tables.rs b/zenoh/src/net/routing/dispatcher/tables.rs index 73338cc79d..10605b25b1 100644 --- a/zenoh/src/net/routing/dispatcher/tables.rs +++ b/zenoh/src/net/routing/dispatcher/tables.rs @@ -172,6 +172,7 @@ pub fn close_face(tables: &TablesLock, face: &Weak) { match face.upgrade() { Some(mut face) => { log::debug!("Close {}", face); + face.task_controller.terminate_all(Duration::from_secs(10)); finalize_pending_queries(tables, &mut face); zlock!(tables.ctrl_lock).close_face(tables, &mut face); } diff --git a/zenoh/src/net/routing/hat/linkstate_peer/mod.rs b/zenoh/src/net/routing/hat/linkstate_peer/mod.rs index b7a3cfa448..35afaf30d7 100644 --- a/zenoh/src/net/routing/hat/linkstate_peer/mod.rs +++ b/zenoh/src/net/routing/hat/linkstate_peer/mod.rs @@ -47,16 +47,16 @@ use std::{ any::Any, collections::{HashMap, HashSet}, sync::Arc, + time::Duration, }; -use tokio::task::JoinHandle; use zenoh_config::{unwrap_or_default, ModeDependent, WhatAmI, WhatAmIMatcher, ZenohId}; -use zenoh_core::{ResolveFuture, SyncResolve}; use zenoh_protocol::{ common::ZExtBody, network::{declare::queryable::ext::QueryableInfo, oam::id::OAM_LINKSTATE, Oam}, }; use zenoh_result::ZResult; use zenoh_sync::get_mut_unchecked; +use zenoh_task::TerminatableTask; use zenoh_transport::unicast::TransportUnicast; mod network; @@ -113,17 +113,14 @@ struct HatTables { peer_subs: HashSet>, peer_qabls: HashSet>, peers_net: Option, - peers_trees_task: Option>, + peers_trees_task: Option, } impl Drop for HatTables { fn drop(&mut self) { if self.peers_trees_task.is_some() { let task = self.peers_trees_task.take().unwrap(); - ResolveFuture::new(async move { - let _ = task.await; - }) - .res_sync(); + task.terminate(Duration::from_secs(10)); } } } @@ -141,24 +138,28 @@ impl HatTables { fn schedule_compute_trees(&mut self, tables_ref: Arc) { log::trace!("Schedule computations"); if self.peers_trees_task.is_none() { - let task = Some(zenoh_runtime::ZRuntime::Net.spawn(async move { - tokio::time::sleep(std::time::Duration::from_millis( - *TREES_COMPUTATION_DELAY_MS, - )) - .await; - let mut tables = zwrite!(tables_ref.tables); - - log::trace!("Compute trees"); - let new_childs = hat_mut!(tables).peers_net.as_mut().unwrap().compute_trees(); - - log::trace!("Compute routes"); - pubsub::pubsub_tree_change(&mut tables, &new_childs); - queries::queries_tree_change(&mut tables, &new_childs); - - log::trace!("Computations completed"); - hat_mut!(tables).peers_trees_task = None; - })); - self.peers_trees_task = task; + let task = TerminatableTask::spawn( + zenoh_runtime::ZRuntime::Net, + async move { + tokio::time::sleep(std::time::Duration::from_millis( + *TREES_COMPUTATION_DELAY_MS, + )) + .await; + let mut tables = zwrite!(tables_ref.tables); + + log::trace!("Compute trees"); + let new_childs = hat_mut!(tables).peers_net.as_mut().unwrap().compute_trees(); + + log::trace!("Compute routes"); + pubsub::pubsub_tree_change(&mut tables, &new_childs); + queries::queries_tree_change(&mut tables, &new_childs); + + log::trace!("Computations completed"); + hat_mut!(tables).peers_trees_task = None; + }, + TerminatableTask::create_cancellation_token(), + ); + self.peers_trees_task = Some(task); } } } diff --git a/zenoh/src/net/routing/hat/router/mod.rs b/zenoh/src/net/routing/hat/router/mod.rs index a933f41060..030b8da4b4 100644 --- a/zenoh/src/net/routing/hat/router/mod.rs +++ b/zenoh/src/net/routing/hat/router/mod.rs @@ -52,16 +52,16 @@ use std::{ collections::{hash_map::DefaultHasher, HashMap, HashSet}, hash::Hasher, sync::Arc, + time::Duration, }; -use tokio::task::JoinHandle; use zenoh_config::{unwrap_or_default, ModeDependent, WhatAmI, WhatAmIMatcher, ZenohId}; -use zenoh_core::{ResolveFuture, SyncResolve}; use zenoh_protocol::{ common::ZExtBody, network::{declare::queryable::ext::QueryableInfo, oam::id::OAM_LINKSTATE, Oam}, }; use zenoh_result::ZResult; use zenoh_sync::get_mut_unchecked; +use zenoh_task::TerminatableTask; use zenoh_transport::unicast::TransportUnicast; mod network; @@ -122,8 +122,8 @@ struct HatTables { routers_net: Option, peers_net: Option, shared_nodes: Vec, - routers_trees_task: Option>, - peers_trees_task: Option>, + routers_trees_task: Option, + peers_trees_task: Option, router_peers_failover_brokering: bool, } @@ -131,17 +131,11 @@ impl Drop for HatTables { fn drop(&mut self) { if self.peers_trees_task.is_some() { let task = self.peers_trees_task.take().unwrap(); - ResolveFuture::new(async move { - let _ = task.await; - }) - .res_sync(); + task.terminate(Duration::from_secs(10)); } if self.routers_trees_task.is_some() { let task = self.routers_trees_task.take().unwrap(); - ResolveFuture::new(async move { - let _ = task.await; - }) - .res_sync(); + task.terminate(Duration::from_secs(10)); } } } @@ -263,36 +257,40 @@ impl HatTables { if (net_type == WhatAmI::Router && self.routers_trees_task.is_none()) || (net_type == WhatAmI::Peer && self.peers_trees_task.is_none()) { - let task = Some(zenoh_runtime::ZRuntime::Net.spawn(async move { - tokio::time::sleep(std::time::Duration::from_millis( - *TREES_COMPUTATION_DELAY_MS, - )) - .await; - let mut tables = zwrite!(tables_ref.tables); - - log::trace!("Compute trees"); - let new_childs = match net_type { - WhatAmI::Router => hat_mut!(tables) - .routers_net - .as_mut() - .unwrap() - .compute_trees(), - _ => hat_mut!(tables).peers_net.as_mut().unwrap().compute_trees(), - }; + let task = TerminatableTask::spawn( + zenoh_runtime::ZRuntime::Net, + async move { + tokio::time::sleep(std::time::Duration::from_millis( + *TREES_COMPUTATION_DELAY_MS, + )) + .await; + let mut tables = zwrite!(tables_ref.tables); + + log::trace!("Compute trees"); + let new_childs = match net_type { + WhatAmI::Router => hat_mut!(tables) + .routers_net + .as_mut() + .unwrap() + .compute_trees(), + _ => hat_mut!(tables).peers_net.as_mut().unwrap().compute_trees(), + }; - log::trace!("Compute routes"); - pubsub::pubsub_tree_change(&mut tables, &new_childs, net_type); - queries::queries_tree_change(&mut tables, &new_childs, net_type); + log::trace!("Compute routes"); + pubsub::pubsub_tree_change(&mut tables, &new_childs, net_type); + queries::queries_tree_change(&mut tables, &new_childs, net_type); - log::trace!("Computations completed"); - match net_type { - WhatAmI::Router => hat_mut!(tables).routers_trees_task = None, - _ => hat_mut!(tables).peers_trees_task = None, - }; - })); + log::trace!("Computations completed"); + match net_type { + WhatAmI::Router => hat_mut!(tables).routers_trees_task = None, + _ => hat_mut!(tables).peers_trees_task = None, + }; + }, + TerminatableTask::create_cancellation_token(), + ); match net_type { - WhatAmI::Router => self.routers_trees_task = task, - _ => self.peers_trees_task = task, + WhatAmI::Router => self.routers_trees_task = Some(task), + _ => self.peers_trees_task = Some(task), }; } } diff --git a/zenoh/src/net/runtime/mod.rs b/zenoh/src/net/runtime/mod.rs index 878ed27f40..9314186b2e 100644 --- a/zenoh/src/net/runtime/mod.rs +++ b/zenoh/src/net/runtime/mod.rs @@ -30,6 +30,7 @@ use futures::stream::StreamExt; use futures::Future; use std::any::Any; use std::sync::{Arc, Weak}; +use std::time::Duration; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use uhlc::{HLCBuilder, HLC}; @@ -178,7 +179,9 @@ impl Runtime { pub async fn close(&self) -> ZResult<()> { log::trace!("Runtime::close())"); // TODO: Check this whether is able to terminate all spawned task by Runtime::spawn - self.state.task_controller.terminate_all(); + self.state + .task_controller + .terminate_all(Duration::from_secs(10)); self.manager().close().await; // clean up to break cyclic reference of self.state to itself self.state.transport_handlers.write().unwrap().clear(); diff --git a/zenoh/src/net/tests/tables.rs b/zenoh/src/net/tests/tables.rs index e8b6f6ac9f..1b02a5964f 100644 --- a/zenoh/src/net/tests/tables.rs +++ b/zenoh/src/net/tests/tables.rs @@ -166,8 +166,8 @@ fn match_test() { } } -#[test] -fn clean_test() { +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn clean_test() { let config = Config::default(); let router = Router::new( ZenohId::try_from([1]).unwrap(), diff --git a/zenoh/src/session.rs b/zenoh/src/session.rs index 0456a070cb..7cd495d378 100644 --- a/zenoh/src/session.rs +++ b/zenoh/src/session.rs @@ -521,7 +521,7 @@ impl Session { pub fn close(mut self) -> impl Resolve> { ResolveFuture::new(async move { trace!("close()"); - self.task_controller.terminate_all(); + self.task_controller.terminate_all(Duration::from_secs(10)); self.runtime.close().await?; let mut state = zwrite!(self.state); @@ -1768,25 +1768,30 @@ impl Session { _ => 1, }; + let token = self.task_controller.get_cancellation_token(); self.task_controller .spawn_with_rt(zenoh_runtime::ZRuntime::Net, { let state = self.state.clone(); let zid = self.runtime.zid(); async move { - tokio::time::sleep(timeout).await; - let mut state = zwrite!(state); - if let Some(query) = state.queries.remove(&qid) { - std::mem::drop(state); - log::debug!("Timeout on query {}! Send error and close.", qid); - if query.reception_mode == ConsolidationMode::Latest { - for (_, reply) in query.replies.unwrap().into_iter() { - (query.callback)(reply); + tokio::select! { + _ = tokio::time::sleep(timeout) => { + let mut state = zwrite!(state); + if let Some(query) = state.queries.remove(&qid) { + std::mem::drop(state); + log::debug!("Timeout on query {}! Send error and close.", qid); + if query.reception_mode == ConsolidationMode::Latest { + for (_, reply) in query.replies.unwrap().into_iter() { + (query.callback)(reply); + } + } + (query.callback)(Reply { + sample: Err("Timeout".into()), + replier_id: zid, + }); } } - (query.callback)(Reply { - sample: Err("Timeout".into()), - replier_id: zid, - }); + _ = token.cancelled() => {} } } }); diff --git a/zenoh/tests/routing.rs b/zenoh/tests/routing.rs index 38ed8ff087..6c5afe0673 100644 --- a/zenoh/tests/routing.rs +++ b/zenoh/tests/routing.rs @@ -27,7 +27,7 @@ use zenoh_core::ztimeout; use zenoh_protocol::core::{WhatAmI, WhatAmIMatcher}; use zenoh_result::bail; -const TIMEOUT: Duration = Duration::from_secs(60); +const TIMEOUT: Duration = Duration::from_secs(10); const MSG_COUNT: usize = 50; const MSG_SIZE: [usize; 2] = [1_024, 131_072]; // Maximal recipes to run at once From 8520a245c2fa98ae8ce86a679aac96a41dc8f058 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Wed, 27 Mar 2024 13:49:19 +0100 Subject: [PATCH 22/38] terminate scouting task --- zenoh/src/scouting.rs | 54 ++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/zenoh/src/scouting.rs b/zenoh/src/scouting.rs index ab5866e388..cd25393754 100644 --- a/zenoh/src/scouting.rs +++ b/zenoh/src/scouting.rs @@ -14,12 +14,13 @@ use crate::handlers::{locked, Callback, DefaultHandler}; use crate::net::runtime::{orchestrator::Loop, Runtime}; -use futures::StreamExt; +use std::time::Duration; use std::{fmt, future::Ready, net::SocketAddr, ops::Deref}; use tokio::net::UdpSocket; use zenoh_core::{AsyncResolve, Resolvable, SyncResolve}; use zenoh_protocol::core::WhatAmIMatcher; use zenoh_result::ZResult; +use zenoh_task::TerminatableTask; /// Constants and helpers for zenoh `whatami` flags. pub use zenoh_protocol::core::WhatAmI; @@ -204,7 +205,7 @@ where /// ``` pub(crate) struct ScoutInner { #[allow(dead_code)] - pub(crate) stop_sender: flume::Sender<()>, + pub(crate) scout_task: Option, } impl ScoutInner { @@ -226,11 +227,19 @@ impl ScoutInner { /// # } /// ``` pub fn stop(self) { - // This drops the inner `stop_sender` and hence stops the scouting receiver std::mem::drop(self); } } +impl Drop for ScoutInner { + fn drop(&mut self) { + if self.scout_task.is_some() { + let task = self.scout_task.take(); + task.unwrap().terminate(Duration::from_secs(10)); + } + } +} + impl fmt::Debug for ScoutInner { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("CallbackScout").finish() @@ -307,7 +316,6 @@ fn scout( zenoh_config::defaults::scouting::multicast::interface, |s| s.as_ref(), ); - let (stop_sender, stop_receiver) = flume::bounded::<()>(1); let ifaces = Runtime::get_interfaces(ifaces); if !ifaces.is_empty() { let sockets: Vec = ifaces @@ -315,25 +323,29 @@ fn scout( .filter_map(|iface| Runtime::bind_ucast_port(iface).ok()) .collect(); if !sockets.is_empty() { - zenoh_runtime::ZRuntime::Net.spawn(async move { - let mut stop_receiver = stop_receiver.stream(); - let scout = Runtime::scout(&sockets, what, &addr, move |hello| { - let callback = callback.clone(); - async move { - callback(hello); - Loop::Continue + let cancellation_token = TerminatableTask::create_cancellation_token(); + let cancellation_token_clone = cancellation_token.clone(); + let task = TerminatableTask::spawn( + zenoh_runtime::ZRuntime::Net, + async move { + let scout = Runtime::scout(&sockets, what, &addr, move |hello| { + let callback = callback.clone(); + async move { + callback(hello); + Loop::Continue + } + }); + tokio::select! { + _ = scout => {}, + _ = cancellation_token_clone.cancelled() => { log::trace!("stop scout({}, {})", what, &config); }, } - }); - let stop = async move { - stop_receiver.next().await; - log::trace!("stop scout({}, {})", what, &config); - }; - tokio::select! { - _ = scout => {}, - _ = stop => {}, - } + }, + cancellation_token.clone(), + ); + return Ok(ScoutInner { + scout_task: Some(task), }); } } - Ok(ScoutInner { stop_sender }) + Ok(ScoutInner { scout_task: None }) } From 12584fa0a2ac9094c0d5ac5c459617586865838f Mon Sep 17 00:00:00 2001 From: Mahmoud Mazouz Date: Fri, 29 Mar 2024 15:18:15 +0100 Subject: [PATCH 23/38] Squash merge sync-lockfile prs, provide zenoh HEAD hash & date (#876) * ci: Squash merge sync-lockfile prs, provide zenoh HEAD hash & date * fix: Add # before pull request number * fix: Set zenoh HEAD info in title --- .github/workflows/sync-lockfiles.yml | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/.github/workflows/sync-lockfiles.yml b/.github/workflows/sync-lockfiles.yml index 581deeb5c8..5240ab403f 100644 --- a/.github/workflows/sync-lockfiles.yml +++ b/.github/workflows/sync-lockfiles.yml @@ -18,6 +18,9 @@ jobs: fetch: name: Fetch Zenoh's lockfile runs-on: ubuntu-latest + outputs: + zenoh-head-hash: ${{ steps.info.outputs.head-hash }} + zenoh-head-date: ${{ steps.info.outputs.head-date }} steps: - name: Checkout Zenoh uses: actions/checkout@v4 @@ -25,6 +28,12 @@ jobs: repository: eclipse-zenoh/zenoh ref: ${{ inputs.branch }} + - id: info + name: Get HEAD info + run: | + echo "head-hash=$(git log -1 --format=%h)" >> $GITHUB_OUTPUT + echo "head-date=$(git log -1 --format=%ad)" >> $GITHUB_OUTPUT + - name: Upload lockfile uses: actions/upload-artifact@v3 with: @@ -94,8 +103,14 @@ jobs: # NOTE: If there is a pending PR, this action will simply update it with a forced push. uses: peter-evans/create-pull-request@v6 with: - title: Sync lockfile with Zenoh's - body: Automated synchronization of the Cargo lockfile with Zenoh. This is done to ensure plugin ABI compatibility. + title: Sync `Cargo.lock` with `eclipse-zenoh/zenoh@${{ needs.fetch.outputs.zenoh-head-hash }}` from `${{ needs.fetch.outputs.zenoh-head-date }}`" + body: > + This pull request synchronizes ${{ matrix.dependant }}'s Cargo lockfile with zenoh's. + This is done to ensure ABI compatibility between Zenoh applications, backends & plugins. + + - **Zenoh HEAD hash**: eclipse-zenoh/zenoh@${{ needs.fetch.outputs.zenoh-head-hash }} + - **Zenoh HEAD date**: ${{ needs.fetch.outputs.zenoh-head-date }} + - **Workflow run**: [${{ github.run_id }}](https://github.com/eclipse-zenoh/zenoh/actions/runs/${{ github.run_id }}) commit-message: "chore: Sync Cargo lockfile with Zenoh's" committer: eclipse-zenoh-bot author: eclipse-zenoh-bot @@ -107,6 +122,10 @@ jobs: - name: Enable auto merge for the pull request if: steps.cpr.outputs.pull-request-operation == 'created' - run: gh pr merge -R "eclipse-zenoh/${{ matrix.dependant }}" --merge --auto "${{ steps.cpr.outputs.pull-request-number }}" + run: > + gh pr merge "${{ steps.cpr.outputs.pull-request-number }}" + --repo "eclipse-zenoh/${{ matrix.dependant }}" + --squash + --auto env: GH_TOKEN: ${{ secrets.BOT_TOKEN_WORKFLOW }} From 9449c31c124a16ed86762e1fab4ffcf2f5103edc Mon Sep 17 00:00:00 2001 From: DenisBiryukov91 <155981813+DenisBiryukov91@users.noreply.github.com> Date: Fri, 29 Mar 2024 16:54:04 +0100 Subject: [PATCH 24/38] Correct generic name IntoSelector to TryIntoSelector in Session.get (#879) --- zenoh/src/session.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zenoh/src/session.rs b/zenoh/src/session.rs index 7cd495d378..5d508db815 100644 --- a/zenoh/src/session.rs +++ b/zenoh/src/session.rs @@ -774,13 +774,13 @@ impl Session { /// } /// # } /// ``` - pub fn get<'a, 'b: 'a, IntoSelector>( + pub fn get<'a, 'b: 'a, TryIntoSelector>( &'a self, - selector: IntoSelector, + selector: TryIntoSelector, ) -> GetBuilder<'a, 'b, DefaultHandler> where - IntoSelector: TryInto>, - >>::Error: Into, + TryIntoSelector: TryInto>, + >>::Error: Into, { let selector = selector.try_into().map_err(Into::into); let timeout = { From 05b9cb459f77693bbf7c89d67265ba1519959814 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Fri, 29 Mar 2024 17:02:23 +0100 Subject: [PATCH 25/38] Fix cargo clippy (#880) --- Cargo.lock | 16 ++++++++++++++++ commons/zenoh-macros/build.rs | 1 + zenoh/tests/connection_retry.rs | 3 +-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4ad3f6f7e7..b331a798b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4415,6 +4415,7 @@ dependencies = [ "zenoh-runtime", "zenoh-shm", "zenoh-sync", + "zenoh-task", "zenoh-transport", "zenoh-util", ] @@ -4543,6 +4544,7 @@ dependencies = [ "zenoh-result", "zenoh-runtime", "zenoh-sync", + "zenoh-task", "zenoh-util", ] @@ -4925,6 +4927,7 @@ dependencies = [ name = "zenoh-runtime" version = "0.11.0-dev" dependencies = [ + "futures", "lazy_static", "tokio", "zenoh-collections", @@ -4956,6 +4959,18 @@ dependencies = [ "zenoh-runtime", ] +[[package]] +name = "zenoh-task" +version = "0.11.0-dev" +dependencies = [ + "futures", + "log", + "tokio", + "tokio-util", + "zenoh-core", + "zenoh-runtime", +] + [[package]] name = "zenoh-transport" version = "0.11.0-dev" @@ -4987,6 +5002,7 @@ dependencies = [ "zenoh-runtime", "zenoh-shm", "zenoh-sync", + "zenoh-task", "zenoh-util", ] diff --git a/commons/zenoh-macros/build.rs b/commons/zenoh-macros/build.rs index 557593d00e..d5ce6632dc 100644 --- a/commons/zenoh-macros/build.rs +++ b/commons/zenoh-macros/build.rs @@ -23,6 +23,7 @@ fn main() { let version_rs = std::path::PathBuf::from(env::var_os("OUT_DIR").unwrap()).join("version.rs"); let mut version_rs = OpenOptions::new() .create(true) + .truncate(true) .write(true) .open(version_rs) .unwrap(); diff --git a/zenoh/tests/connection_retry.rs b/zenoh/tests/connection_retry.rs index db84d7bd5d..fcb071b489 100644 --- a/zenoh/tests/connection_retry.rs +++ b/zenoh/tests/connection_retry.rs @@ -36,8 +36,7 @@ fn retry_config_overriding() { .insert_json5("listen/exit_on_failure", "false") .unwrap(); - let expected = vec![ - // global value + let expected = [ ConnectionRetryConf { period_init_ms: 3000, period_max_ms: 6000, From 51f4f1ca083a8c4431f0bfa724f075de8ffb8723 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Mon, 1 Apr 2024 11:02:30 +0200 Subject: [PATCH 26/38] Review tokio runtime being used (#875) * Review tokio runtime being used * Fix cargo clippy --- io/zenoh-links/zenoh-link-tcp/src/unicast.rs | 2 +- io/zenoh-links/zenoh-link-tls/src/unicast.rs | 4 ++-- io/zenoh-links/zenoh-link-unixsock_stream/src/unicast.rs | 2 +- io/zenoh-links/zenoh-link-ws/src/unicast.rs | 2 +- io/zenoh-transport/src/manager.rs | 4 ++-- io/zenoh-transport/src/unicast/universal/link.rs | 2 +- zenoh-ext/src/publication_cache.rs | 2 +- zenoh/src/net/routing/hat/p2p_peer/gossip.rs | 2 +- zenoh/src/scouting.rs | 2 +- zenoh/src/session.rs | 4 ++-- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/io/zenoh-links/zenoh-link-tcp/src/unicast.rs b/io/zenoh-links/zenoh-link-tcp/src/unicast.rs index 7137ac0212..361f4fe69e 100644 --- a/io/zenoh-links/zenoh-link-tcp/src/unicast.rs +++ b/io/zenoh-links/zenoh-link-tcp/src/unicast.rs @@ -171,7 +171,7 @@ impl LinkUnicastTrait for LinkUnicastTcp { // impl Drop for LinkUnicastTcp { // fn drop(&mut self) { // // Close the underlying TCP socket -// zenoh_runtime::ZRuntime::TX.block_in_place(async { +// zenoh_runtime::ZRuntime::Acceptor.block_in_place(async { // let _ = self.get_mut_socket().shutdown().await; // }); // } diff --git a/io/zenoh-links/zenoh-link-tls/src/unicast.rs b/io/zenoh-links/zenoh-link-tls/src/unicast.rs index 7da711161e..b24ce4ac31 100644 --- a/io/zenoh-links/zenoh-link-tls/src/unicast.rs +++ b/io/zenoh-links/zenoh-link-tls/src/unicast.rs @@ -204,8 +204,8 @@ impl Drop for LinkUnicastTls { fn drop(&mut self) { // Close the underlying TCP stream let (tcp_stream, _) = self.get_sock_mut().get_mut(); - let _ = - zenoh_runtime::ZRuntime::TX.block_in_place(async move { tcp_stream.shutdown().await }); + let _ = zenoh_runtime::ZRuntime::Acceptor + .block_in_place(async move { tcp_stream.shutdown().await }); } } diff --git a/io/zenoh-links/zenoh-link-unixsock_stream/src/unicast.rs b/io/zenoh-links/zenoh-link-unixsock_stream/src/unicast.rs index 53441ab89c..b85cee9c66 100644 --- a/io/zenoh-links/zenoh-link-unixsock_stream/src/unicast.rs +++ b/io/zenoh-links/zenoh-link-unixsock_stream/src/unicast.rs @@ -144,7 +144,7 @@ impl LinkUnicastTrait for LinkUnicastUnixSocketStream { impl Drop for LinkUnicastUnixSocketStream { fn drop(&mut self) { // Close the underlying UnixSocketStream socket - let _ = zenoh_runtime::ZRuntime::TX + let _ = zenoh_runtime::ZRuntime::Acceptor .block_in_place(async move { self.get_mut_socket().shutdown().await }); } } diff --git a/io/zenoh-links/zenoh-link-ws/src/unicast.rs b/io/zenoh-links/zenoh-link-ws/src/unicast.rs index 6a0cf64e6e..1a6d0fecf3 100644 --- a/io/zenoh-links/zenoh-link-ws/src/unicast.rs +++ b/io/zenoh-links/zenoh-link-ws/src/unicast.rs @@ -224,7 +224,7 @@ impl LinkUnicastTrait for LinkUnicastWs { impl Drop for LinkUnicastWs { fn drop(&mut self) { - zenoh_runtime::ZRuntime::TX.block_in_place(async { + zenoh_runtime::ZRuntime::Acceptor.block_in_place(async { let mut guard = zasynclock!(self.send); // Close the underlying TCP socket guard.close().await.unwrap_or_else(|e| { diff --git a/io/zenoh-transport/src/manager.rs b/io/zenoh-transport/src/manager.rs index f97c126f8b..a52a35af83 100644 --- a/io/zenoh-transport/src/manager.rs +++ b/io/zenoh-transport/src/manager.rs @@ -438,8 +438,8 @@ impl TransportManager { // TODO(yuyuan): Can we make this async as above? pub fn get_locators(&self) -> Vec { - let mut lsu = zenoh_runtime::ZRuntime::TX.block_in_place(self.get_locators_unicast()); - let mut lsm = zenoh_runtime::ZRuntime::TX.block_in_place(self.get_locators_multicast()); + let mut lsu = zenoh_runtime::ZRuntime::Net.block_in_place(self.get_locators_unicast()); + let mut lsm = zenoh_runtime::ZRuntime::Net.block_in_place(self.get_locators_multicast()); lsu.append(&mut lsm); lsu } diff --git a/io/zenoh-transport/src/unicast/universal/link.rs b/io/zenoh-transport/src/unicast/universal/link.rs index 93a6c717dd..6e86a59aa7 100644 --- a/io/zenoh-transport/src/unicast/universal/link.rs +++ b/io/zenoh-transport/src/unicast/universal/link.rs @@ -102,7 +102,7 @@ impl TransportLinkUnicastUniversal { // to finish in the close() joining its handle // TODO(yuyuan): do more study to check which ZRuntime should be used or refine the // termination - zenoh_runtime::ZRuntime::TX + zenoh_runtime::ZRuntime::Net .spawn(async move { transport.del_link(tx.inner.link()).await }); } }; diff --git a/zenoh-ext/src/publication_cache.rs b/zenoh-ext/src/publication_cache.rs index eec398d592..aede6a2ee4 100644 --- a/zenoh-ext/src/publication_cache.rs +++ b/zenoh-ext/src/publication_cache.rs @@ -170,7 +170,7 @@ impl<'a> PublicationCache<'a> { let token = TerminatableTask::create_cancellation_token(); let token2 = token.clone(); let task = TerminatableTask::spawn( - zenoh_runtime::ZRuntime::TX, + zenoh_runtime::ZRuntime::Application, async move { let mut cache: HashMap> = HashMap::with_capacity(resources_limit.unwrap_or(32)); diff --git a/zenoh/src/net/routing/hat/p2p_peer/gossip.rs b/zenoh/src/net/routing/hat/p2p_peer/gossip.rs index f651ccdc0d..bbe7bd9024 100644 --- a/zenoh/src/net/routing/hat/p2p_peer/gossip.rs +++ b/zenoh/src/net/routing/hat/p2p_peer/gossip.rs @@ -408,7 +408,7 @@ impl Network { if !self.autoconnect.is_empty() { // Connect discovered peers - if zenoh_runtime::ZRuntime::Net + if zenoh_runtime::ZRuntime::Acceptor .block_in_place(strong_runtime.manager().get_transport_unicast(&zid)) .is_none() && self.autoconnect.matches(whatami) diff --git a/zenoh/src/scouting.rs b/zenoh/src/scouting.rs index cd25393754..f2c90123ce 100644 --- a/zenoh/src/scouting.rs +++ b/zenoh/src/scouting.rs @@ -326,7 +326,7 @@ fn scout( let cancellation_token = TerminatableTask::create_cancellation_token(); let cancellation_token_clone = cancellation_token.clone(); let task = TerminatableTask::spawn( - zenoh_runtime::ZRuntime::Net, + zenoh_runtime::ZRuntime::Acceptor, async move { let scout = Runtime::scout(&sockets, what, &addr, move |hello| { let callback = callback.clone(); diff --git a/zenoh/src/session.rs b/zenoh/src/session.rs index 5d508db815..e68c3ed955 100644 --- a/zenoh/src/session.rs +++ b/zenoh/src/session.rs @@ -1508,7 +1508,7 @@ impl Session { // Cannot hold session lock when calling tables (matching_status()) // TODO: check which ZRuntime should be used self.task_controller - .spawn_with_rt(zenoh_runtime::ZRuntime::RX, { + .spawn_with_rt(zenoh_runtime::ZRuntime::Net, { let session = self.clone(); let msub = msub.clone(); async move { @@ -1546,7 +1546,7 @@ impl Session { // Cannot hold session lock when calling tables (matching_status()) // TODO: check which ZRuntime should be used self.task_controller - .spawn_with_rt(zenoh_runtime::ZRuntime::RX, { + .spawn_with_rt(zenoh_runtime::ZRuntime::Net, { let session = self.clone(); let msub = msub.clone(); async move { From 02d4783042d50110ccfb79aada420ff5231ae840 Mon Sep 17 00:00:00 2001 From: ayhon <43295942+ayhon@users.noreply.github.com> Date: Tue, 2 Apr 2024 09:49:49 +0200 Subject: [PATCH 27/38] feat: use GitHub's markdown alerts for warnings (#883) --- README.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5bce835c29..fb268141a5 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,8 @@ Then you can start run `zenohd`. ------------------------------- ## How to build it -> :warning: **WARNING** :warning: : Zenoh and its ecosystem are under active development. When you build from git, make sure you also build from git any other Zenoh repository you plan to use (e.g. binding, plugin, backend, etc.). It may happen that some changes in git are not compatible with the most recent packaged Zenoh release (e.g. deb, docker, pip). We put particular effort in mantaining compatibility between the various git repositories in the Zenoh project. +> [!WARNING] +> Zenoh and its ecosystem are under active development. When you build from git, make sure you also build from git any other Zenoh repository you plan to use (e.g. binding, plugin, backend, etc.). It may happen that some changes in git are not compatible with the most recent packaged Zenoh release (e.g. deb, docker, pip). We put particular effort in mantaining compatibility between the various git repositories in the Zenoh project. Install [Cargo and Rust](https://doc.rust-lang.org/cargo/getting-started/installation.html). Zenoh can be succesfully compiled with Rust stable (>= 1.71.0), so no special configuration is required from your side. If you already have the Rust toolchain installed, make sure it is up-to-date with: @@ -94,6 +95,7 @@ Zenoh's router is built as `target/release/zenohd`. All the examples are built i **Routed tests:** +> [!NOTE] > **Windows users**: to properly execute the commands below in PowerShell you need to escape `"` characters as `\"`. - **put/store/get** @@ -168,19 +170,22 @@ See other examples of Zenoh usage in [examples/](examples) If not specified, the REST plugin will be active on any interface (`[::]`) and port `8000`. -> :warning: **WARNING** :warning: : The following documentation pertains to the v0.6+ API, which comes many changes to the behaviour and configuration of Zenoh. +> [!WARNING] +> The following documentation pertains to the v0.6+ API, which comes many changes to the behaviour and configuration of Zenoh. To access the v0.5 version of the code and matching README, please go to the [0.5.0-beta.9](https://github.com/eclipse-zenoh/zenoh/tree/0.5.0-beta.9) tagged version. ------------------------------- ## Plugins -> :warning: **WARNING** :warning: : As Rust doesn't have a stable ABI, the plugins should be +> [!WARNING] +> As Rust doesn't have a stable ABI, the plugins should be built with the exact same Rust version than `zenohd`, and using for `zenoh` dependency the same version (or commit number) than 'zenohd'. Otherwise, incompatibilities in memory mapping of shared types between `zenohd` and the library can lead to a `"SIGSEV"` crash. By default the Zenoh router is delivered or built with 2 plugins. These may be configured through a configuration file, or through individual changes to the configuration via the `--cfg` CLI option or via zenoh puts on individual parts of the configuration. -> :warning: **WARNING** :warning: : since `v0.6`, `zenohd` no longer loads every available plugin at startup. Instead, only configured plugins are loaded (after processing `--cfg` and `--plugin` options). Once `zenohd` is running, plugins can be hot-loaded and, if they support it, reconfigured at runtime by editing their configuration through the adminspace. +> [!WARNING] +> Since `v0.6`, `zenohd` no longer loads every available plugin at startup. Instead, only configured plugins are loaded (after processing `--cfg` and `--plugin` options). Once `zenohd` is running, plugins can be hot-loaded and, if they support it, reconfigured at runtime by editing their configuration through the adminspace. Note that the REST plugin is added to the configuration by the default value of the `--rest-http-port` CLI argument. From 6799a92f427396c91a0eb437cc9bf1fdcf3ae872 Mon Sep 17 00:00:00 2001 From: Yuyuan Yuan Date: Wed, 3 Apr 2024 16:07:52 +0800 Subject: [PATCH 28/38] Fix issue #887 (#888) --- zenoh-ext/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenoh-ext/Cargo.toml b/zenoh-ext/Cargo.toml index 6a0488cb54..b68cfe03a1 100644 --- a/zenoh-ext/Cargo.toml +++ b/zenoh-ext/Cargo.toml @@ -38,7 +38,7 @@ flume = { workspace = true } futures = { workspace = true } log = { workspace = true } serde = { workspace = true, features = ["default"] } -zenoh = { workspace = true, features = ["unstable"], default-features = false } +zenoh = { workspace = true, features = ["unstable"], default-features = true } zenoh-core = { workspace = true } zenoh-macros = { workspace = true } zenoh-result = { workspace = true } From 44900b745cca6e93c5f44158bf73a82071d2cbb3 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 4 Apr 2024 09:40:57 +0000 Subject: [PATCH 29/38] fix ZRuntimePool drop crash if not runtimes were initialized --- commons/zenoh-runtime/src/lib.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index 6b62fdf7b7..ec8c622442 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -154,17 +154,19 @@ impl Drop for ZRuntimePool { let handles: Vec<_> = self .0 .drain() - .map(|(name, mut rt)| { - std::thread::spawn(move || { - rt.take() - .unwrap_or_else(|| panic!("ZRuntime {name:?} failed to shutdown.")) - .shutdown_timeout(Duration::from_secs(1)) - }) + .map(|(_name, mut rt)| { + rt.take().map(|r| + std::thread::spawn(move || { + r.shutdown_timeout(Duration::from_secs(1)) + }) + ) }) .collect(); - for hd in handles { - let _ = hd.join(); + for hdo in handles { + if let Some(hd) = hdo { + let _ = hd.join(); + } } } } From f419da8e68162a805b815e181c0f2f6482782e32 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Thu, 4 Apr 2024 10:06:22 +0000 Subject: [PATCH 30/38] clippy and fmt --- commons/zenoh-runtime/src/lib.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/commons/zenoh-runtime/src/lib.rs b/commons/zenoh-runtime/src/lib.rs index ec8c622442..492e0a6665 100644 --- a/commons/zenoh-runtime/src/lib.rs +++ b/commons/zenoh-runtime/src/lib.rs @@ -154,19 +154,14 @@ impl Drop for ZRuntimePool { let handles: Vec<_> = self .0 .drain() - .map(|(_name, mut rt)| { - rt.take().map(|r| - std::thread::spawn(move || { - r.shutdown_timeout(Duration::from_secs(1)) - }) - ) + .filter_map(|(_name, mut rt)| { + rt.take() + .map(|r| std::thread::spawn(move || r.shutdown_timeout(Duration::from_secs(1)))) }) .collect(); - for hdo in handles { - if let Some(hd) = hdo { - let _ = hd.join(); - } + for hd in handles { + let _ = hd.join(); } } } From d818d60efc07027670cd5cfb4ea598d14b0ae272 Mon Sep 17 00:00:00 2001 From: DenisBiryukov91 <155981813+DenisBiryukov91@users.noreply.github.com> Date: Thu, 4 Apr 2024 17:52:55 +0200 Subject: [PATCH 31/38] Proposal: Cancel query timeout task, once query is finalized (#881) * Cancel query timeout task, once query is finalized * Add missing cargo.lock --- zenoh/src/net/routing/dispatcher/face.rs | 3 +- zenoh/src/net/routing/dispatcher/queries.rs | 74 ++++++++++++--------- 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/zenoh/src/net/routing/dispatcher/face.rs b/zenoh/src/net/routing/dispatcher/face.rs index 3d7d850e6b..765779ee40 100644 --- a/zenoh/src/net/routing/dispatcher/face.rs +++ b/zenoh/src/net/routing/dispatcher/face.rs @@ -21,6 +21,7 @@ use std::any::Any; use std::collections::HashMap; use std::fmt; use std::sync::{Arc, Weak}; +use tokio_util::sync::CancellationToken; use zenoh_protocol::zenoh::RequestBody; use zenoh_protocol::{ core::{ExprId, WhatAmI, ZenohId}, @@ -42,7 +43,7 @@ pub struct FaceState { pub(crate) local_mappings: HashMap>, pub(crate) remote_mappings: HashMap>, pub(crate) next_qid: RequestId, - pub(crate) pending_queries: HashMap>, + pub(crate) pending_queries: HashMap, CancellationToken)>, pub(crate) mcast_group: Option, pub(crate) in_interceptors: Option>, pub(crate) hat: Box, diff --git a/zenoh/src/net/routing/dispatcher/queries.rs b/zenoh/src/net/routing/dispatcher/queries.rs index 01c681e0d6..570377acd1 100644 --- a/zenoh/src/net/routing/dispatcher/queries.rs +++ b/zenoh/src/net/routing/dispatcher/queries.rs @@ -20,6 +20,8 @@ use crate::net::routing::RoutingContext; use async_trait::async_trait; use std::collections::HashMap; use std::sync::{Arc, Weak}; +use std::time::Duration; +use tokio_util::sync::CancellationToken; use zenoh_config::WhatAmI; use zenoh_protocol::core::key_expr::keyexpr; use zenoh_protocol::network::declare::queryable::ext::QueryableInfo; @@ -236,7 +238,10 @@ fn insert_pending_query(outface: &mut Arc, query: Arc) -> Requ let outface_mut = get_mut_unchecked(outface); outface_mut.next_qid += 1; let qid = outface_mut.next_qid; - outface_mut.pending_queries.insert(qid, query); + outface_mut.pending_queries.insert( + qid, + (query, outface_mut.task_controller.get_cancellation_token()), + ); qid } @@ -362,6 +367,31 @@ struct QueryCleanup { qid: RequestId, } +impl QueryCleanup { + pub fn spawn_query_clean_up_task( + face: &Arc, + tables_ref: &Arc, + qid: u32, + timeout: Duration, + ) { + let mut cleanup = QueryCleanup { + tables: tables_ref.clone(), + face: Arc::downgrade(face), + qid, + }; + if let Some((_, cancellation_token)) = face.pending_queries.get(&qid) { + let c_cancellation_token = cancellation_token.clone(); + face.task_controller + .spawn_with_rt(zenoh_runtime::ZRuntime::Net, async move { + tokio::select! { + _ = tokio::time::sleep(timeout) => { cleanup.run().await } + _ = c_cancellation_token.cancelled() => {} + } + }); + } + } +} + #[async_trait] impl Timed for QueryCleanup { async fn run(&mut self) { @@ -374,7 +404,7 @@ impl Timed for QueryCleanup { drop(tables_lock); log::warn!( "Didn't receive final reply {}:{} from {}: Timeout!", - query.src_face, + query.0.src_face, self.qid, face ); @@ -594,20 +624,8 @@ pub fn route_query( #[cfg(feature = "complete_n")] { for ((outface, key_expr, context), qid, t) in route.values() { - let mut cleanup = QueryCleanup { - tables: tables_ref.clone(), - face: Arc::downgrade(outface), - qid: *qid, - }; - let cancellation_token = face.task_controller.get_cancellation_token(); - face.task_controller.spawn_with_rt( - zenoh_runtime::ZRuntime::Net, - async move { - tokio::select! { - _ = tokio::time::sleep(timeout) => { cleanup.run().await } - _ = cancellation_token.cancelled() => {} - } - }, + QueryCleanup::spawn_query_clean_up_task( + outface, tables_ref, *qid, timeout, ); #[cfg(feature = "stats")] if !admin { @@ -637,20 +655,8 @@ pub fn route_query( #[cfg(not(feature = "complete_n"))] { for ((outface, key_expr, context), qid) in route.values() { - let mut cleanup = QueryCleanup { - tables: tables_ref.clone(), - face: Arc::downgrade(outface), - qid: *qid, - }; - let cancellation_token = face.task_controller.get_cancellation_token(); - face.task_controller.spawn_with_rt( - zenoh_runtime::ZRuntime::Net, - async move { - tokio::select! { - _ = tokio::time::sleep(timeout) => { cleanup.run().await } - _ = cancellation_token.cancelled() => {} - } - }, + QueryCleanup::spawn_query_clean_up_task( + outface, tables_ref, *qid, timeout, ); #[cfg(feature = "stats")] if !admin { @@ -731,7 +737,7 @@ pub(crate) fn route_send_response( } match face.pending_queries.get(&qid) { - Some(query) => { + Some((query, _)) => { drop(queries_lock); #[cfg(feature = "stats")] @@ -777,7 +783,7 @@ pub(crate) fn route_send_response_final( drop(queries_lock); log::debug!( "Received final reply {}:{} from {}", - query.src_face, + query.0.src_face, qid, face ); @@ -800,7 +806,9 @@ pub(crate) fn finalize_pending_queries(tables_ref: &TablesLock, face: &mut Arc) { +pub(crate) fn finalize_pending_query(query: (Arc, CancellationToken)) { + let (query, cancellation_token) = query; + cancellation_token.cancel(); if let Some(query) = Arc::into_inner(query) { log::debug!("Propagate final reply {}:{}", query.src_face, query.src_qid); query From 9485061e42dcba2b20f991a0a796a636ad78c828 Mon Sep 17 00:00:00 2001 From: Luca Cominardi Date: Fri, 5 Apr 2024 09:49:41 +0200 Subject: [PATCH 32/38] Revert "Fix issue #887 (#888)" (#905) This reverts commit 6799a92f427396c91a0eb437cc9bf1fdcf3ae872. --- zenoh-ext/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenoh-ext/Cargo.toml b/zenoh-ext/Cargo.toml index b68cfe03a1..6a0488cb54 100644 --- a/zenoh-ext/Cargo.toml +++ b/zenoh-ext/Cargo.toml @@ -38,7 +38,7 @@ flume = { workspace = true } futures = { workspace = true } log = { workspace = true } serde = { workspace = true, features = ["default"] } -zenoh = { workspace = true, features = ["unstable"], default-features = true } +zenoh = { workspace = true, features = ["unstable"], default-features = false } zenoh-core = { workspace = true } zenoh-macros = { workspace = true } zenoh-result = { workspace = true } From 20628838bc3968d1fa1d5100e1f31068438a5810 Mon Sep 17 00:00:00 2001 From: Mahmoud Mazouz Date: Fri, 5 Apr 2024 12:59:24 +0200 Subject: [PATCH 33/38] fix: Release workflow (#904) * fix: Release workflow * fix: Remove enforce-linking-issues workflow --- .github/workflows/enforce-linking-issues.yml | 60 -------------------- .github/workflows/pre-release.yml | 15 ++--- .github/workflows/release.yml | 47 ++++++++++----- 3 files changed, 37 insertions(+), 85 deletions(-) delete mode 100644 .github/workflows/enforce-linking-issues.yml diff --git a/.github/workflows/enforce-linking-issues.yml b/.github/workflows/enforce-linking-issues.yml deleted file mode 100644 index 37bb2cf63e..0000000000 --- a/.github/workflows/enforce-linking-issues.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: Enforce linking issues - -on: - pull_request_target: - types: [opened, edited, labeled] - workflow_call: - -defaults: - run: - shell: bash - -jobs: - main: - name: Enforce referencing a closing issue - runs-on: ubuntu-latest - steps: - - name: Count closing issue references - id: has-closing-issue - uses: actions/github-script@v7 - with: - result-encoding: string - script: | - const query = `query ($owner: String!, $name: String!, $number: Int!) { - repository(owner: $owner, name: $name) { - pullRequest(number: $number) { - closingIssuesReferences(first: 100) { - totalCount - } - } - } - }`; - - const reply = await github.graphql(query, { - owner: context.repo.owner, - name: context.repo.repo, - number: context.payload.pull_request.number - }); - - return reply - .repository - .pullRequest - .closingIssuesReferences - .totalCount > 0; - - - if: ${{ steps.has-closing-issue.outputs.result != 'true' }} - name: Suggest that the contributor link an issue - uses: actions/github-script@v7 - with: - github-token: ${{ secrets.BOT_TOKEN_WORKFLOW }} - script: | - const login = "${{ github.event.pull_request.user.login }}"; - const syntaxUrl = "https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue"; - const message = `@${login} If this pull request contains a bugfix or a new feature, then please consider using \`Closes #ISSUE-NUMBER\` [syntax](${syntaxUrl}) to link it to an issue.` - - github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.payload.pull_request.number, - body: message, - }); diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index 631302d722..2a1f760e26 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -35,10 +35,8 @@ jobs: - name: Clone this repository uses: actions/checkout@v4 - - name: Install Rust toolchain - run: | - rustup show - rustup component add rustfmt clippy + - name: Install rustup components + run: rustup component add rustfmt clippy - name: Code format check run: cargo fmt --check @@ -62,11 +60,8 @@ jobs: - name: Clone this repository uses: actions/checkout@v4 - - name: Install Rust toolchain - run: rustup show - - name: Install nextest - run: cargo install --locked cargo-nextest + run: cargo +stable install --locked cargo-nextest env: CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse @@ -83,7 +78,7 @@ jobs: ASYNC_STD_THREAD_COUNT: 4 doc: - name: Doc generation + name: Generate documentation needs: checks runs-on: ubuntu-latest steps: @@ -94,7 +89,7 @@ jobs: - name: Install Rust toolchain nightly for docs gen run: rustup toolchain install nightly - - name: generate doc + - name: Run rustdoc using Nightly Rust and Zenoh unstable # NOTE: force 'unstable' feature for doc generation, as forced for docs.rs build in zenoh/Cargo.toml run: > cargo +nightly rustdoc --manifest-path ./zenoh/Cargo.toml --lib --features unstable -j3 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fe050776ec..21770b262b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,24 +20,38 @@ on: inputs: live-run: type: boolean - description: If false (or undefined) the workflow runs in dry-run mode (i.e. with no side-effects) + description: Live-run required: false default: false version: type: string - description: Release number. If undefined, the workflow auto-generates a version using git-describe + description: Release number required: false jobs: tag: - name: Bump and tag crates - uses: eclipse-zenoh/ci/.github/workflows/tag-crates.yml@main - with: - repo: ${{ github.repository }} - live-run: ${{ inputs.live-run || false }} - version: ${{ inputs.version }} - inter-deps-pattern: zenoh.* - secrets: inherit + name: Branch, bump & tag crates + runs-on: ubuntu-latest + outputs: + version: ${{ steps.create-release-branch.outputs.version }} + branch: ${{ steps.create-release-branch.outputs.branch }} + steps: + - id: create-release-branch + uses: eclipse-zenoh/ci/create-release-branch@main + with: + repo: ${{ github.repository }} + live-run: ${{ inputs.live-run || false }} + version: ${{ inputs.version }} + github-token: ${{ secrets.BOT_TOKEN_WORKFLOW }} + + - uses: eclipse-zenoh/ci/bump-crates@main + with: + repo: ${{ github.repository }} + version: ${{ steps.create-release-branch.outputs.version }} + branch: ${{ steps.create-release-branch.outputs.branch }} + bump-deps-pattern: zenoh.* + bump-deps-version: ${{ steps.create-release-branch.outputs.version }} + github-token: ${{ secrets.BOT_TOKEN_WORKFLOW }} build-debian: name: Build Debian packages @@ -64,14 +78,13 @@ jobs: secrets: inherit cargo: - name: Publish Cargo crates needs: tag + name: Publish Cargo crates uses: eclipse-zenoh/ci/.github/workflows/release-crates-cargo.yml@main with: - repos: ${{ github.repository }} + repo: ${{ github.repository }} live-run: ${{ inputs.live-run || false }} branch: ${{ needs.tag.outputs.branch }} - inter-deps-pattern: zenoh.* secrets: inherit debian: @@ -146,9 +159,10 @@ jobs: uses: eclipse-zenoh/ci/.github/workflows/release-crates-dockerhub.yml@main with: no-build: true - live-run: ${{ inputs.live-run || false }} + live-run: true version: ${{ needs.tag.outputs.version }} repo: ${{ github.repository }} + branch: ${{ needs.tag.outputs.branch }} tags: "eclipse/zenoh:${{ needs.tag.outputs.version }}" binary: zenohd files: | @@ -158,6 +172,7 @@ jobs: platforms: | linux/arm64 linux/amd64 + licenses: EPL-2.0 OR Apache-2.0 secrets: inherit ghcr: @@ -166,9 +181,10 @@ jobs: uses: eclipse-zenoh/ci/.github/workflows/release-crates-ghcr.yml@main with: no-build: true - live-run: ${{ inputs.live-run || false }} + live-run: true version: ${{ needs.tag.outputs.version }} repo: ${{ github.repository }} + branch: ${{ needs.tag.outputs.branch }} tags: "${{ github.repository }}:${{ needs.tag.outputs.version }}" binary: zenohd files: | @@ -178,4 +194,5 @@ jobs: platforms: | linux/arm64 linux/amd64 + licenses: EPL-2.0 OR Apache-2.0 secrets: inherit From b2ef3ddcbd13779553b038b10576e9a761fd015d Mon Sep 17 00:00:00 2001 From: Mahmoud Mazouz Date: Fri, 5 Apr 2024 16:40:23 +0200 Subject: [PATCH 34/38] fix: Release workflow (#909) * fix: Add missing ghcr prefix in release workflow * fix: Set unpublished-deps-patterns to match all zenoh crates --- .github/workflows/release.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 21770b262b..f5ff3fd2f9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -85,6 +85,7 @@ jobs: repo: ${{ github.repository }} live-run: ${{ inputs.live-run || false }} branch: ${{ needs.tag.outputs.branch }} + unpublished-deps-patterns: "zenoh.*" secrets: inherit debian: @@ -185,7 +186,7 @@ jobs: version: ${{ needs.tag.outputs.version }} repo: ${{ github.repository }} branch: ${{ needs.tag.outputs.branch }} - tags: "${{ github.repository }}:${{ needs.tag.outputs.version }}" + tags: "ghcr.io/${{ github.repository }}:${{ needs.tag.outputs.version }}" binary: zenohd files: | zenohd From 0926dd36bf81cb56729af63ea39c2c4389bfbad4 Mon Sep 17 00:00:00 2001 From: Gabriele Baldoni Date: Mon, 8 Apr 2024 16:29:49 +0000 Subject: [PATCH 35/38] fix(913): using 2+batch.len() when increasing rx_bytes (#914) * fix(913): using 2+batch.len() when increasing rx_bytes Signed-off-by: gabrik * chore: format Signed-off-by: gabrik --------- Signed-off-by: gabrik --- io/zenoh-transport/src/unicast/universal/link.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io/zenoh-transport/src/unicast/universal/link.rs b/io/zenoh-transport/src/unicast/universal/link.rs index 6e86a59aa7..fe4e8c8691 100644 --- a/io/zenoh-transport/src/unicast/universal/link.rs +++ b/io/zenoh-transport/src/unicast/universal/link.rs @@ -260,7 +260,8 @@ async fn rx_task( let batch = batch.map_err(|_| zerror!("{}: expired after {} milliseconds", link, lease.as_millis()))??; #[cfg(feature = "stats")] { - transport.stats.inc_rx_bytes(2 + n); // Account for the batch len encoding (16 bits) + + transport.stats.inc_rx_bytes(2 + batch.len()); // Account for the batch len encoding (16 bits) } transport.read_messages(batch, &l)?; } From fff238c9fb44210f7b2d338343d1e1986f5e9e44 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Tue, 9 Apr 2024 15:37:32 +0200 Subject: [PATCH 36/38] make session only close its runtime on drop, if it was created from config --- zenoh/src/session.rs | 11 +++++++--- zenoh/tests/session.rs | 46 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/zenoh/src/session.rs b/zenoh/src/session.rs index e68c3ed955..88f3c2cb77 100644 --- a/zenoh/src/session.rs +++ b/zenoh/src/session.rs @@ -393,6 +393,7 @@ pub struct Session { pub(crate) state: Arc>, pub(crate) id: u16, pub(crate) alive: bool, + owns_runtime: bool, task_controller: TaskController, } @@ -414,6 +415,7 @@ impl Session { state: state.clone(), id: SESSION_ID_COUNTER.fetch_add(1, Ordering::SeqCst), alive: true, + owns_runtime: false, task_controller: TaskController::default(), }; @@ -522,8 +524,9 @@ impl Session { ResolveFuture::new(async move { trace!("close()"); self.task_controller.terminate_all(Duration::from_secs(10)); - self.runtime.close().await?; - + if self.owns_runtime { + self.runtime.close().await?; + } let mut state = zwrite!(self.state); state.primitives.as_ref().unwrap().send_close(); // clean up to break cyclic references from self.state to itself @@ -810,6 +813,7 @@ impl Session { state: self.state.clone(), id: self.id, alive: false, + owns_runtime: self.owns_runtime, task_controller: self.task_controller.clone(), } } @@ -822,13 +826,14 @@ impl Session { let aggregated_publishers = config.aggregation().publishers().clone(); match Runtime::init(config).await { Ok(mut runtime) => { - let session = Self::init( + let mut session = Self::init( runtime.clone(), aggregated_subscribers, aggregated_publishers, ) .res_async() .await; + session.owns_runtime = true; match runtime.start().await { Ok(()) => { // Workaround for the declare_and_shoot problem diff --git a/zenoh/tests/session.rs b/zenoh/tests/session.rs index b5f897be4c..a284cb9580 100644 --- a/zenoh/tests/session.rs +++ b/zenoh/tests/session.rs @@ -15,6 +15,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Duration; use zenoh::prelude::r#async::*; +use zenoh::runtime::Runtime; use zenoh_core::ztimeout; const TIMEOUT: Duration = Duration::from_secs(60); @@ -64,7 +65,7 @@ async fn open_session_multicast(endpoint01: &str, endpoint02: &str) -> (Session, } async fn close_session(peer01: Session, peer02: Session) { - println!("[ ][01d] Closing peer02 session"); + println!("[ ][01d] Closing peer01 session"); ztimeout!(peer01.close().res_async()).unwrap(); println!("[ ][02d] Closing peer02 session"); ztimeout!(peer02.close().res_async()).unwrap(); @@ -198,3 +199,46 @@ async fn zenoh_session_multicast() { test_session_pubsub(&peer01, &peer02, Reliability::BestEffort).await; close_session(peer01, peer02).await; } + + +async fn open_session_unicast_runtime(endpoints: &[&str]) -> (Runtime, Runtime) { + // Open the sessions + let mut config = config::peer(); + config.listen.endpoints = endpoints + .iter() + .map(|e| e.parse().unwrap()) + .collect::>(); + config.scouting.multicast.set_enabled(Some(false)).unwrap(); + println!("[ ][01a] Creating r1 session runtime: {:?}", endpoints); + let r1 = Runtime::new(config).await.unwrap(); + + let mut config = config::peer(); + config.connect.endpoints = endpoints + .iter() + .map(|e| e.parse().unwrap()) + .collect::>(); + config.scouting.multicast.set_enabled(Some(false)).unwrap(); + println!("[ ][02a] Creating r2 session runtime: {:?}", endpoints); + let r2 = Runtime::new(config).await.unwrap(); + + (r1, r2) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn zenoh_2sessions_1runtime_init() { + let (r1, r2) = open_session_unicast_runtime(&["tcp/127.0.1:17449"]).await; + println!("[RI][02a] Creating peer01 session from runtime 1"); + let peer01 = zenoh::init(r1.clone()).res_async().await.unwrap(); + println!("[RI][02b] Creating peer02 session from runtime 2"); + let peer02 = zenoh::init(r2.clone()).res_async().await.unwrap(); + println!("[RI][02c] Creating peer01a session from runtime 1"); + let peer01a = zenoh::init(r1.clone()).res_async().await.unwrap(); + println!("[RI][03c] Closing peer01a session"); + std::mem::drop(peer01a); + test_session_pubsub(&peer01, &peer02, Reliability::Reliable).await; + close_session(peer01, peer02).await; + println!("[ ][01e] Closing r1 runtime"); + ztimeout!(r1.close()).unwrap(); + println!("[ ][02e] Closing r2 runtime"); + ztimeout!(r2.close()).unwrap(); +} \ No newline at end of file From c50b680ff2f5df61956ca001526617dc9de9336e Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Tue, 9 Apr 2024 15:44:09 +0200 Subject: [PATCH 37/38] fmt --- zenoh/tests/session.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zenoh/tests/session.rs b/zenoh/tests/session.rs index a284cb9580..d23935bfb4 100644 --- a/zenoh/tests/session.rs +++ b/zenoh/tests/session.rs @@ -200,7 +200,6 @@ async fn zenoh_session_multicast() { close_session(peer01, peer02).await; } - async fn open_session_unicast_runtime(endpoints: &[&str]) -> (Runtime, Runtime) { // Open the sessions let mut config = config::peer(); @@ -241,4 +240,4 @@ async fn zenoh_2sessions_1runtime_init() { ztimeout!(r1.close()).unwrap(); println!("[ ][02e] Closing r2 runtime"); ztimeout!(r2.close()).unwrap(); -} \ No newline at end of file +} From 70d7a4cfff6b405702755aa904f95f3e65b79c69 Mon Sep 17 00:00:00 2001 From: Denis Biryukov Date: Tue, 9 Apr 2024 18:38:29 +0200 Subject: [PATCH 38/38] fix address" --- zenoh/tests/session.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenoh/tests/session.rs b/zenoh/tests/session.rs index d23935bfb4..7e50f7a6bb 100644 --- a/zenoh/tests/session.rs +++ b/zenoh/tests/session.rs @@ -225,7 +225,7 @@ async fn open_session_unicast_runtime(endpoints: &[&str]) -> (Runtime, Runtime) #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn zenoh_2sessions_1runtime_init() { - let (r1, r2) = open_session_unicast_runtime(&["tcp/127.0.1:17449"]).await; + let (r1, r2) = open_session_unicast_runtime(&["tcp/127.0.0.1:17449"]).await; println!("[RI][02a] Creating peer01 session from runtime 1"); let peer01 = zenoh::init(r1.clone()).res_async().await.unwrap(); println!("[RI][02b] Creating peer02 session from runtime 2");