From 68d8b0c92361c6faad6a79630a39f51b339ba8b9 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 17 Apr 2023 12:31:12 +0200 Subject: [PATCH 01/31] version crossbeam at the workspace level --- Cargo.toml | 1 + crates/re_analytics/Cargo.toml | 2 +- crates/re_renderer/Cargo.toml | 2 +- crates/re_sdk_comms/Cargo.toml | 2 +- crates/re_smart_channel/Cargo.toml | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b5c35e7a5280..1e45230f6bdd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,7 @@ arrow2_convert = "0.4.2" cfg-if = "1.0" clap = "4.0" comfy-table = { version = "6.1", default-features = false } +crossbeam = "0.8" ctrlc = { version = "3.0", features = ["termination"] } ecolor = "0.21.0" eframe = { version = "0.21.3", default-features = false } diff --git a/crates/re_analytics/Cargo.toml b/crates/re_analytics/Cargo.toml index b1c3477d4ca2..dc8e8b440150 100644 --- a/crates/re_analytics/Cargo.toml +++ b/crates/re_analytics/Cargo.toml @@ -23,7 +23,7 @@ re_log.workspace = true # External dependencies: anyhow.workspace = true -crossbeam = "0.8" +crossbeam.workspace = true once_cell = "1.17" serde = { version = "1", features = ["derive"] } serde_json = "1" diff --git a/crates/re_renderer/Cargo.toml b/crates/re_renderer/Cargo.toml index c5a2fb1df2b7..eaf73892e1ee 100644 --- a/crates/re_renderer/Cargo.toml +++ b/crates/re_renderer/Cargo.toml @@ -75,7 +75,7 @@ tobj = { version = "3.2", optional = true } # native [target.'cfg(not(target_arch = "wasm32"))'.dependencies] -crossbeam = "0.8" +crossbeam.workspace = true notify = "5.0" puffin.workspace = true wgpu-core.workspace = true diff --git a/crates/re_sdk_comms/Cargo.toml b/crates/re_sdk_comms/Cargo.toml index 9aa9cce5d4cc..485b13488d72 100644 --- a/crates/re_sdk_comms/Cargo.toml +++ b/crates/re_sdk_comms/Cargo.toml @@ -32,7 +32,7 @@ re_smart_channel.workspace = true ahash.workspace = true anyhow.workspace = true bincode = "1.3" -crossbeam = "0.8" +crossbeam.workspace = true document-features = "0.2" rand = { version = "0.8.5", features = ["small_rng"] } tokio.workspace = true diff --git a/crates/re_smart_channel/Cargo.toml b/crates/re_smart_channel/Cargo.toml index c8f79f5e839f..58cf36de8b58 100644 --- a/crates/re_smart_channel/Cargo.toml +++ b/crates/re_smart_channel/Cargo.toml @@ -17,5 +17,5 @@ all-features = true [dependencies] -crossbeam = "0.8" +crossbeam.workspace = true instant = { version = "0.1", features = ["wasm-bindgen"] } From cb7403814a04fb577d891d68ae8519e942cf93ab Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 26 Apr 2023 16:23:19 +0200 Subject: [PATCH 02/31] more DataRow size helpers --- crates/re_log_types/src/data_row.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/crates/re_log_types/src/data_row.rs b/crates/re_log_types/src/data_row.rs index 6cee609fa8fb..1850c8b913c8 100644 --- a/crates/re_log_types/src/data_row.rs +++ b/crates/re_log_types/src/data_row.rs @@ -87,6 +87,13 @@ impl std::ops::IndexMut for DataCellRow { } } +impl SizeBytes for DataCellRow { + #[inline] + fn heap_size_bytes(&self) -> u64 { + self.0.heap_size_bytes() + } +} + // --- /// A unique ID for a [`DataRow`]. @@ -325,6 +332,24 @@ impl DataRow { } } +impl SizeBytes for DataRow { + fn heap_size_bytes(&self) -> u64 { + let Self { + row_id, + timepoint, + entity_path, + num_instances, + cells, + } = self; + + row_id.heap_size_bytes() + + timepoint.heap_size_bytes() + + entity_path.heap_size_bytes() + + num_instances.heap_size_bytes() + + cells.heap_size_bytes() + } +} + impl DataRow { #[inline] pub fn row_id(&self) -> RowId { From a0d9d39f318dbf56a62eaf1445a5f7c8522f1c82 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 26 Apr 2023 18:30:46 +0200 Subject: [PATCH 03/31] DataTableBatcher --- Cargo.lock | 33 + crates/re_log_types/Cargo.toml | 2 + crates/re_log_types/src/data_table_batcher.rs | 836 ++++++++++++++++++ crates/re_log_types/src/lib.rs | 32 +- 4 files changed, 890 insertions(+), 13 deletions(-) create mode 100644 crates/re_log_types/src/data_table_batcher.rs diff --git a/Cargo.lock b/Cargo.lock index ed0120052432..dabac16c4dc7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -507,6 +507,17 @@ dependencies = [ "objc2-encode", ] +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", +] + [[package]] name = "bumpalo" version = "3.11.1" @@ -3923,6 +3934,7 @@ dependencies = [ "arrow2", "arrow2_convert", "bytemuck", + "crossbeam", "document-features", "ecolor", "fixed", @@ -3945,6 +3957,7 @@ dependencies = [ "rmp-serde", "serde", "serde_bytes", + "similar-asserts", "smallvec", "thiserror", "time 0.3.20", @@ -4278,6 +4291,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.6.28" @@ -4722,6 +4741,20 @@ name = "similar" version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" +dependencies = [ + "bstr", + "unicode-segmentation", +] + +[[package]] +name = "similar-asserts" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbf644ad016b75129f01a34a355dcb8d66a5bc803e417c7a77cc5d5ee9fa0f18" +dependencies = [ + "console", + "similar", +] [[package]] name = "slab" diff --git a/crates/re_log_types/Cargo.toml b/crates/re_log_types/Cargo.toml index 3af826321b61..ccfd8988a164 100644 --- a/crates/re_log_types/Cargo.toml +++ b/crates/re_log_types/Cargo.toml @@ -94,8 +94,10 @@ serde_bytes = { version = "0.11", optional = true } # Native dependencies: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] +crossbeam.workspace = true puffin.workspace = true [dev-dependencies] rmp-serde = "1.1" +similar-asserts = "1.4.2" diff --git a/crates/re_log_types/src/data_table_batcher.rs b/crates/re_log_types/src/data_table_batcher.rs new file mode 100644 index 000000000000..e0c085927fd0 --- /dev/null +++ b/crates/re_log_types/src/data_table_batcher.rs @@ -0,0 +1,836 @@ +// TODO: debouncing is probably another PR? + +// TODO: logs + +// TODO: why at the table level rather than the chunk level you ask? + +// TODO: we're still inserting row by row on the other end though + +// TODO: we have to be quite specific about per-process/per-thread/per-recording? + +// TODO: strong guarantees for the thread that flushes/shutdowns/drops, weak guarantees for the +// other ones + +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; + +use crossbeam::channel::{Receiver, SendError, Sender}; + +use crate::{DataRow, DataTable, SizeBytes, TableId}; + +// --- + +/// Errors that can occur when creating/manipulating a [`DataTableBatcher`]. +#[derive(thiserror::Error, Debug)] +pub enum DataTableBatcherError { + /// Error when parsing configuration from environment. + #[error("Failed to parse config: '{name}={value}': {err}")] + ParseConfig { + name: &'static str, + value: String, + err: Box, + }, + + /// Error spawning one of the background threads. + #[error("Failed to spawn background thread '{name}': {err}")] + SpawnThread { + name: &'static str, + err: Box, + }, +} + +pub type DataTableBatcherResult = Result; + +// --- + +/// Defines the the different thresholds of the associated [`DataTableBatcher`]. +/// +/// See [`Self::default`] and [`Self::from_env`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct DataTableBatcherConfig { + /// Duration of the periodic tick. + // + // NOTE: We use `std::time` directly because this library has to deal with `crossbeam` as well + // as std threads, which both expect standard types anyway. + // + // TODO(cmc): Add support for burst debouncing. + pub flush_tick: Duration, + + /// Flush if the accumulated payload has a size in bytes equal or greater than this. + pub flush_num_bytes: u64, + + /// Flush if the accumulated payload has a number of rows equal or greater than this. + pub flush_num_rows: u64, + + /// Size of the internal channel of [`DataTableBatcherCmd`]s. + /// + /// Unbounded if left unspecified. + pub max_commands_in_flight: Option, + + /// Size of the internal channel of [`DataTable`]s. + /// + /// Unbounded if left unspecified. + pub max_tables_in_flight: Option, +} + +impl Default for DataTableBatcherConfig { + fn default() -> Self { + Self::DEFAULT + } +} + +impl DataTableBatcherConfig { + /// Default configuration, applicable to most use cases. + pub const DEFAULT: Self = Self { + flush_tick: Duration::from_millis(50), + flush_num_bytes: 1024 * 1024, // 1 MiB + flush_num_rows: u64::MAX, + max_commands_in_flight: None, + max_tables_in_flight: None, + }; + + /// Always flushes ASAP. + pub const ALWAYS: Self = Self { + flush_tick: Duration::MAX, + flush_num_bytes: 0, + flush_num_rows: 0, + max_commands_in_flight: None, + max_tables_in_flight: None, + }; + + /// Never flushes unless manually told to. + pub const NEVER: Self = Self { + flush_tick: Duration::MAX, + flush_num_bytes: u64::MAX, + flush_num_rows: u64::MAX, + max_commands_in_flight: None, + max_tables_in_flight: None, + }; + + /// Environment variable to configure [`Self::flush_duration`]. + pub const ENV_FLUSH_TICK: &str = "RERUN_FLUSH_TICK_SECS"; + + /// Environment variable to configure [`Self::flush_num_bytes`]. + pub const ENV_FLUSH_NUM_BYTES: &str = "RERUN_FLUSH_NUM_BYTES"; + + /// Environment variable to configure [`Self::flush_num_rows`]. + pub const ENV_FLUSH_NUM_ROWS: &str = "RERUN_FLUSH_NUM_ROWS"; + + /// Creates a new `DataTableBatcherConfig` using the default values, optionally overridden + /// through the environment. + /// + /// See [`Self::apply_env`]. + #[inline] + pub fn from_env() -> DataTableBatcherResult { + Self::default().apply_env() + } + + /// Returns a copy of `self`, overriding existing fields with values from the environment if + /// they are present. + /// + /// See [`Self::ENV_FLUSH_DURATION`], [`Self::ENV_FLUSH_NUM_BYTES`], + /// [`Self::ENV_FLUSH_NUM_BYTES`]. + pub fn apply_env(&self) -> DataTableBatcherResult { + let mut new = self.clone(); + + if let Ok(s) = std::env::var(Self::ENV_FLUSH_TICK) { + let flush_duration_secs: f64 = + s.parse() + .map_err(|err| DataTableBatcherError::ParseConfig { + name: Self::ENV_FLUSH_TICK, + value: s.clone(), + err: Box::new(err), + })?; + + new.flush_tick = Duration::from_secs_f64(flush_duration_secs); + } + + if let Ok(s) = std::env::var(Self::ENV_FLUSH_NUM_BYTES) { + new.flush_num_bytes = s + .parse() + .map_err(|err| DataTableBatcherError::ParseConfig { + name: Self::ENV_FLUSH_NUM_BYTES, + value: s.clone(), + err: Box::new(err), + })?; + } + + if let Ok(s) = std::env::var(Self::ENV_FLUSH_NUM_ROWS) { + new.flush_num_rows = s + .parse() + .map_err(|err| DataTableBatcherError::ParseConfig { + name: Self::ENV_FLUSH_NUM_ROWS, + value: s.clone(), + err: Box::new(err), + })?; + } + + Ok(new) + } +} + +#[test] +fn data_table_batcher_config() { + // Detect breaking changes in our environment variables. + std::env::set_var("RERUN_FLUSH_TICK_SECS", "0.3"); + std::env::set_var("RERUN_FLUSH_NUM_BYTES", "42"); + std::env::set_var("RERUN_FLUSH_NUM_ROWS", "666"); + + let config = DataTableBatcherConfig::from_env().unwrap(); + + let expected = DataTableBatcherConfig { + flush_tick: Duration::from_millis(300), + flush_num_bytes: 42, + flush_num_rows: 666, + ..Default::default() + }; + + assert_eq!(expected, config); +} + +// --- + +/// Implements an asynchronous batcher that coalesces [`DataRow`]s into [`DataTable`]s based upon +/// the threshold defined in the associated [`DataTableBatcherConfig`]. +/// +/// ## Multithreading and ordering +/// +/// `DataTableBatcher` can be cheaply clone and used freely across any number of threads. +/// +/// Internally, all operations are linearized into a pipeline: +/// - All operations sent by a given thread will take effect in the same exact order as that +/// thread originally sent them in from its point of view. +/// - There is no defined global ordering across multiple threads. +/// +/// This means that e.g. flushing the pipeline ([`Self::flush_blocking`]) guarantees that all +/// previous data sent by the calling thread has been batched; no more, no less. +/// +/// ## Shutdown +/// +/// The batcher can be shutdown either by dropping all instances of it, or simply by calling +/// [`Self::shutdown`]. +/// +/// The pipeline will first atomically close input channels, then flush, then finally shutdown. +/// Threads other than the caller that might be in the process of sending data will be greeted with +/// a `SendError` and an opportunity to retrieve the data before it gets lost. +/// +/// Shutting down cannot ever block, unless the output channel is bounded and happens to be full +/// (see [`DataTableBatcherConfig::max_tables_in_flight`]). +#[derive(Clone)] +pub struct DataTableBatcher { + inner: Arc, +} + +// NOTE: The receiving end of the command stream as well as the sending end of the table stream are +// owned solely by the batching thread. +struct DataTableBatcherInner { + tx_cmds: Sender, + rx_tables: Receiver, + cmds_to_tables_handle: Option>, +} + +impl Drop for DataTableBatcherInner { + fn drop(&mut self) { + self.shutdown().ok(); // can only fail if already down + if let Some(handle) = self.cmds_to_tables_handle.take() { + handle.join().ok(); + } + } +} + +enum Command { + // TODO(cmc): support for appending full tables + AppendRow(DataRow), + Flush(Sender<()>), + Shutdown, +} + +impl Command { + fn flush() -> (Self, Receiver<()>) { + let (tx, rx) = crossbeam::channel::bounded(0); // oneshot + (Self::Flush(tx), rx) + } +} + +impl DataTableBatcher { + /// Creates a new `DataTableBatcher` using the passed in `config`. + /// + /// The returned object must be kept in scope: dropping it will trigger a clean shutdown of the + /// batcher. + #[must_use = "Batching threads will automatically shutdown when this object is dropped"] + #[allow(clippy::needless_pass_by_value)] + pub fn new(config: DataTableBatcherConfig) -> DataTableBatcherResult { + let (tx_cmds, rx_cmd) = match config.max_commands_in_flight { + Some(cap) => crossbeam::channel::bounded(cap as _), + None => crossbeam::channel::unbounded(), + }; + + let (tx_table, rx_tables) = match config.max_tables_in_flight { + Some(cap) => crossbeam::channel::bounded(cap as _), + None => crossbeam::channel::unbounded(), + }; + + let cmds_to_tables_handle = { + const NAME: &str = "DataTableBatcher::cmds_to_tables"; + std::thread::Builder::new() + .name(NAME.into()) + .spawn({ + let config = config.clone(); + move || batching_thread(config, rx_cmd, tx_table) + }) + .map_err(|err| DataTableBatcherError::SpawnThread { + name: NAME, + err: Box::new(err), + })? + }; + + re_log::debug!(?config, "creating new table batcher"); + + let inner = DataTableBatcherInner { + tx_cmds, + rx_tables, + cmds_to_tables_handle: Some(cmds_to_tables_handle), + }; + + Ok(Self { + inner: Arc::new(inner), + }) + } + + // --- Send commands --- + + /// Pushes a [`DataRow`] down the batching pipeline. + /// + /// This will call [`DataRow::compute_all_size_bytes`] from the batching thread! + /// + /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + /// + /// ## Failure + /// + /// This can only fail if called on a `DataTableBatcher` that has already shutdown. + /// The data can be retrieved from the returned error value in such cases. + #[inline] + pub fn push_row(&self, row: DataRow) -> Result<(), SendError> { + self.inner.push_row(row) + } + + /// Initiates a flush of the pipeline and returns immediately. + /// + /// This does **not** wait for the flush to propagate (see [`Self::flush_blocking`]). + /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + /// + /// ## Failure + /// + /// This can only fail if called on a `DataTableBatcher` that has already shutdown. + #[inline] + pub fn flush_async(&self) -> Result<(), SendError<()>> { + self.inner.flush_and_forget() + } + + /// Initiates a flush the batching pipeline and waits for it to propagate. + /// + /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + /// + /// ## Failure + /// + /// This can only fail if called on a `DataTableBatcher` that has already shutdown. + #[inline] + pub fn flush_blocking(&self) -> Result<(), SendError<()>> { + self.inner.flush_blocking() + } + + /// Shutdowns the batcher. + /// + /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + /// + /// ## Failure + /// + /// This can only fail if called on a `DataTableBatcher` that has already shutdown. + #[inline] + pub fn shutdown(&self) -> Result<(), SendError<()>> { + self.inner.shutdown() + } + + // --- Subscribe to tables --- + + /// Returns a _shared_ channel in which are sent the batched [`DataTables`]. + /// + /// Shutting down the batcher will close this channel. + /// + /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + pub fn tables(&self) -> Receiver { + self.inner.rx_tables.clone() + } +} + +impl DataTableBatcherInner { + fn push_row(&self, row: DataRow) -> Result<(), SendError> { + self.send_cmd(Command::AppendRow(row)) + .map_err(|err| match err.0 { + Command::AppendRow(row) => SendError(row), + _ => unreachable!(), + }) + } + + fn flush_and_forget(&self) -> Result<(), SendError<()>> { + let (flush_cmd, _) = Command::flush(); + self.send_cmd(flush_cmd).map_err(|_err| SendError(()))?; + Ok(()) + } + + fn flush_blocking(&self) -> Result<(), SendError<()>> { + let (flush_cmd, oneshot) = Command::flush(); + self.send_cmd(flush_cmd).map_err(|_err| SendError(()))?; + oneshot.recv().ok(); + Ok(()) + } + + fn shutdown(&self) -> Result<(), SendError<()>> { + self.flush_and_forget()?; + self.send_cmd(Command::Shutdown) + .map_err(|_err| SendError(()))?; + Ok(()) + } + + fn send_cmd(&self, cmd: Command) -> Result<(), SendError> { + self.tx_cmds.send(cmd) + } +} + +#[allow(clippy::needless_pass_by_value)] +fn batching_thread( + config: DataTableBatcherConfig, + rx_cmd: Receiver, + tx_table: Sender, +) { + let rx_tick = crossbeam::channel::tick(config.flush_tick); + + struct Accumulator { + latest: Instant, + pending_rows: Vec, + pending_num_rows: u64, + pending_num_bytes: u64, + } + impl Accumulator { + fn reset(&mut self) { + self.latest = Instant::now(); + self.pending_rows.clear(); + self.pending_num_rows = 0; + self.pending_num_bytes = 0; + } + } + + let mut acc = Accumulator { + latest: Instant::now(), + pending_rows: Default::default(), + pending_num_rows: Default::default(), + pending_num_bytes: Default::default(), + }; + + fn do_push_row( + config: &DataTableBatcherConfig, + acc: &mut Accumulator, + mut row: DataRow, + ) -> bool { + // TODO(cmc): now that we're re doing this here, it really is a massive waste not to send + // it over the wire... link issue + row.compute_all_size_bytes(); + + acc.pending_num_rows += 1; + acc.pending_num_bytes += row.total_size_bytes(); + acc.pending_rows.push(row); + + acc.pending_num_rows >= config.flush_num_rows + || acc.pending_num_bytes >= config.flush_num_bytes + } + + fn do_flush_all(acc: &mut Accumulator, tx_table: &Sender, reason: &str) { + let rows = &mut acc.pending_rows; + + if rows.is_empty() { + return; + } + + re_log::trace!(reason, "flushing tables"); + + let table = DataTable::from_rows(TableId::random(), rows.drain(..)); + // TODO(cmc): efficient table sorting here, following the same rules as the store's. + // table.sort(); + + // NOTE: This can only fail if all receivers have been dropped, which simply cannot happen + // as long the batching thread is alive... which is where we currently are. + tx_table.send(table).ok(); + } + + use crossbeam::select; + loop { + select! { + recv(rx_cmd) -> cmd => { + let Ok(cmd) = cmd else { + // All command senders are gone, which can only happen if the + // `DataTableBatcher` itself has been dropped. + do_flush_all(&mut acc, &tx_table, "all_dropped"); + break; + }; + + match cmd { + Command::AppendRow(row) => { + if do_push_row(&config, &mut acc, row) { + do_flush_all(&mut acc, &tx_table, "bytes|rows"); + acc.reset(); + } + }, + Command::Flush(oneshot) => { + do_flush_all(&mut acc, &tx_table, "manual"); + acc.reset(); + drop(oneshot); // signals the oneshot + }, + Command::Shutdown => break, + }; + }, + recv(rx_tick) -> _ => { + do_flush_all(&mut acc, &tx_table, "duration"); + acc.reset(); + }, + }; + } + + drop(rx_cmd); + do_flush_all(&mut acc, &tx_table, "shutdown"); + drop(tx_table); + + // NOTE: The receiving end of the command stream as well as the sending end of the table + // stream are owned solely by this thread. + // Past this point, all command writes and all table reads will return `ErrDisconnected`. +} + +// --- + +#[cfg(test)] +mod tests { + use super::*; + + use crossbeam::{channel::TryRecvError, select}; + use itertools::Itertools as _; + + use crate::{DataRow, RowId, SizeBytes, TimePoint, Timeline}; + + #[test] + fn manual_trigger() { + let batcher = DataTableBatcher::new(DataTableBatcherConfig::NEVER).unwrap(); + let tables = batcher.tables(); + + let expected = create_table(); + + for _ in 0..3 { + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + for row in expected.to_rows() { + batcher.push_row(row).unwrap(); + } + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + batcher.flush_blocking().unwrap(); + + { + let mut table = tables.recv().unwrap(); + // NOTE: Override the resulting table's ID so they can be compared. + table.table_id = expected.table_id; + + similar_asserts::assert_eq!(expected, table); + } + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + } + + drop(batcher); + + assert_eq!(Err(TryRecvError::Disconnected), tables.try_recv()); + } + + #[test] + fn shutdown_trigger() { + let batcher = DataTableBatcher::new(DataTableBatcherConfig::NEVER).unwrap(); + let tables = batcher.tables(); + + let rows = create_table().to_rows().collect_vec(); + + for _ in 0..3 { + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + for row in rows.clone() { + batcher.push_row(row).unwrap(); + } + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + } + + drop(batcher); + + let expected = DataTable::from_rows( + TableId::ZERO, + std::iter::repeat_with(|| rows.clone()).take(3).flatten(), + ); + + select! { + recv(tables) -> batch => { + let mut table = batch.unwrap(); + // NOTE: Override the resulting table's ID so they can be compared. + table.table_id = expected.table_id; + + similar_asserts::assert_eq!(expected, table); + } + default(Duration::from_millis(50)) => { + panic!("output channel never yielded any table"); + } + } + + assert_eq!(Err(TryRecvError::Disconnected), tables.try_recv()); + } + + #[test] + fn num_bytes_trigger() { + let mut table = create_table(); + table.compute_all_size_bytes(); + + let rows = table.to_rows().collect_vec(); + let flush_duration = std::time::Duration::from_millis(50); + let flush_num_bytes = rows + .iter() + .take(rows.len() - 1) + .map(|row| row.total_size_bytes()) + .sum::(); + + let batcher = DataTableBatcher::new(DataTableBatcherConfig { + flush_num_bytes, + flush_tick: flush_duration, + ..DataTableBatcherConfig::NEVER + }) + .unwrap(); + let tables = batcher.tables(); + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + for row in table.to_rows() { + batcher.push_row(row).unwrap(); + } + + // Expect all rows except for the last one (num_bytes trigger). + select! { + recv(tables) -> batch => { + let table = batch.unwrap(); + let expected = DataTable::from_rows( + table.table_id, + rows.clone().into_iter().take(rows.len() - 1), + ); + similar_asserts::assert_eq!(expected, table); + } + default(flush_duration) => { + panic!("output channel never yielded any table"); + } + } + + // Expect just the last row (duration trigger). + select! { + recv(tables) -> batch => { + let table = batch.unwrap(); + let expected = DataTable::from_rows( + table.table_id, + rows.last().cloned(), + ); + similar_asserts::assert_eq!(expected, table); + } + default(flush_duration * 2) => { + panic!("output channel never yielded any table"); + } + } + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + drop(batcher); + + assert_eq!(Err(TryRecvError::Disconnected), tables.try_recv()); + } + + #[test] + fn num_rows_trigger() { + let table = create_table(); + + let rows = table.to_rows().collect_vec(); + let flush_duration = std::time::Duration::from_millis(50); + let flush_num_rows = rows.len() as u64 - 1; + + let batcher = DataTableBatcher::new(DataTableBatcherConfig { + flush_num_rows, + flush_tick: flush_duration, + ..DataTableBatcherConfig::NEVER + }) + .unwrap(); + let tables = batcher.tables(); + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + for row in table.to_rows() { + batcher.push_row(row).unwrap(); + } + + // Expect all rows except for the last one. + select! { + recv(tables) -> batch => { + let table = batch.unwrap(); + let expected = DataTable::from_rows( + table.table_id, + rows.clone().into_iter().take(rows.len() - 1), + ); + similar_asserts::assert_eq!(expected, table); + } + default(flush_duration) => { + panic!("output channel never yielded any table"); + } + } + + // Expect just the last row. + select! { + recv(tables) -> batch => { + let table = batch.unwrap(); + let expected = DataTable::from_rows( + table.table_id, + rows.last().cloned(), + ); + similar_asserts::assert_eq!(expected, table); + } + default(flush_duration * 2) => { + panic!("output channel never yielded any table"); + } + } + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + drop(batcher); + + assert_eq!(Err(TryRecvError::Disconnected), tables.try_recv()); + } + + #[test] + fn duration_trigger() { + let table = create_table(); + let rows = table.to_rows().collect_vec(); + + let flush_duration = Duration::from_millis(50); + + let batcher = DataTableBatcher::new(DataTableBatcherConfig { + flush_tick: flush_duration, + ..DataTableBatcherConfig::NEVER + }) + .unwrap(); + let tables = batcher.tables(); + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + _ = std::thread::Builder::new().spawn({ + let mut rows = rows.clone(); + let batcher = batcher.clone(); + move || { + for row in rows.drain(..rows.len() - 1) { + batcher.push_row(row).unwrap(); + } + + std::thread::sleep(flush_duration * 2); + + let row = rows.last().cloned().unwrap(); + batcher.push_row(row).unwrap(); + } + }); + + // Expect all rows except for the last one. + select! { + recv(tables) -> batch => { + let table = batch.unwrap(); + let expected = DataTable::from_rows( + table.table_id, + rows.clone().into_iter().take(rows.len() - 1), + ); + similar_asserts::assert_eq!(expected, table); + } + default(flush_duration * 2) => { + panic!("output channel never yielded any table"); + } + } + + // Expect just the last row. + select! { + recv(tables) -> batch => { + let table = batch.unwrap(); + let expected = DataTable::from_rows( + table.table_id, + rows.last().cloned(), + ); + similar_asserts::assert_eq!(expected, table); + } + default(flush_duration * 4) => { + panic!("output channel never yielded any table"); + } + } + + assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); + + drop(batcher); + + assert_eq!(Err(TryRecvError::Disconnected), tables.try_recv()); + } + + fn create_table() -> DataTable { + use crate::{ + component_types::{ColorRGBA, Label, Point2D}, + Time, + }; + + let timepoint = |frame_nr: i64| { + TimePoint::from([ + (Timeline::new_temporal("log_time"), Time::now().into()), + (Timeline::new_sequence("frame_nr"), frame_nr.into()), + ]) + }; + + let row0 = { + let num_instances = 2; + let points: &[Point2D] = &[[10.0, 10.0].into(), [20.0, 20.0].into()]; + let colors: &[_] = &[ColorRGBA::from_rgb(128, 128, 128)]; + let labels: &[Label] = &[]; + + DataRow::from_cells3( + RowId::random(), + "a", + timepoint(1), + num_instances, + (points, colors, labels), + ) + }; + + let row1 = { + let num_instances = 0; + let colors: &[ColorRGBA] = &[]; + + DataRow::from_cells1(RowId::random(), "b", timepoint(1), num_instances, colors) + }; + + let row2 = { + let num_instances = 1; + let colors: &[_] = &[ColorRGBA::from_rgb(255, 255, 255)]; + let labels: &[_] = &[Label("hey".into())]; + + DataRow::from_cells2( + RowId::random(), + "c", + timepoint(2), + num_instances, + (colors, labels), + ) + }; + + DataTable::from_rows(TableId::ZERO, [row0, row1, row2]) + } +} diff --git a/crates/re_log_types/src/lib.rs b/crates/re_log_types/src/lib.rs index 7775be821e74..dbc635884d20 100644 --- a/crates/re_log_types/src/lib.rs +++ b/crates/re_log_types/src/lib.rs @@ -4,9 +4,6 @@ #![doc = document_features::document_features!()] //! -#[cfg(feature = "arrow_datagen")] -pub mod datagen; - pub mod arrow_msg; mod component; pub mod component_types; @@ -23,17 +20,11 @@ pub mod time_point; mod time_range; mod time_real; -pub mod external { - pub use arrow2; - pub use arrow2_convert; - pub use re_tuid; - - #[cfg(feature = "glam")] - pub use glam; +#[cfg(feature = "arrow_datagen")] +pub mod datagen; - #[cfg(feature = "image")] - pub use image; -} +#[cfg(not(target_arch = "wasm32"))] +mod data_table_batcher; pub use self::arrow_msg::ArrowMsg; pub use self::component::{Component, DeserializableComponent, SerializableComponent}; @@ -61,6 +52,21 @@ pub use self::time_point::{TimeInt, TimePoint, TimeType, Timeline, TimelineName} pub use self::time_range::{TimeRange, TimeRangeF}; pub use self::time_real::TimeReal; +#[cfg(not(target_arch = "wasm32"))] +pub use self::data_table_batcher::{DataTableBatcher, DataTableBatcherConfig}; + +pub mod external { + pub use arrow2; + pub use arrow2_convert; + pub use re_tuid; + + #[cfg(feature = "glam")] + pub use glam; + + #[cfg(feature = "image")] + pub use image; +} + #[macro_export] macro_rules! impl_into_enum { ($from_ty: ty, $enum_name: ident, $to_enum_variant: ident) => { From f46ac7201cf10cba1a5952d999e270118b86edc0 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 26 Apr 2023 18:47:19 +0200 Subject: [PATCH 04/31] lints --- crates/re_log_types/src/data_table_batcher.rs | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/crates/re_log_types/src/data_table_batcher.rs b/crates/re_log_types/src/data_table_batcher.rs index e0c085927fd0..16e9555faa36 100644 --- a/crates/re_log_types/src/data_table_batcher.rs +++ b/crates/re_log_types/src/data_table_batcher.rs @@ -1,16 +1,3 @@ -// TODO: debouncing is probably another PR? - -// TODO: logs - -// TODO: why at the table level rather than the chunk level you ask? - -// TODO: we're still inserting row by row on the other end though - -// TODO: we have to be quite specific about per-process/per-thread/per-recording? - -// TODO: strong guarantees for the thread that flushes/shutdowns/drops, weak guarantees for the -// other ones - use std::{ sync::Arc, time::{Duration, Instant}, @@ -413,6 +400,7 @@ fn batching_thread( pending_num_rows: u64, pending_num_bytes: u64, } + impl Accumulator { fn reset(&mut self) { self.latest = Instant::now(); @@ -434,8 +422,8 @@ fn batching_thread( acc: &mut Accumulator, mut row: DataRow, ) -> bool { - // TODO(cmc): now that we're re doing this here, it really is a massive waste not to send - // it over the wire... link issue + // TODO(#1760): now that we're re doing this here, it really is a massive waste not to send + // it over the wire... row.compute_all_size_bytes(); acc.pending_num_rows += 1; @@ -456,7 +444,7 @@ fn batching_thread( re_log::trace!(reason, "flushing tables"); let table = DataTable::from_rows(TableId::random(), rows.drain(..)); - // TODO(cmc): efficient table sorting here, following the same rules as the store's. + // TODO(#1981): efficient table sorting here, following the same rules as the store's. // table.sort(); // NOTE: This can only fail if all receivers have been dropped, which simply cannot happen From 5440f764c9f3082943f61e0e94a2ba0d091f5cd8 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 26 Apr 2023 18:52:50 +0200 Subject: [PATCH 05/31] lints --- crates/re_log_types/src/data_table_batcher.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/re_log_types/src/data_table_batcher.rs b/crates/re_log_types/src/data_table_batcher.rs index 16e9555faa36..df4e6992f1a6 100644 --- a/crates/re_log_types/src/data_table_batcher.rs +++ b/crates/re_log_types/src/data_table_batcher.rs @@ -51,7 +51,7 @@ pub struct DataTableBatcherConfig { /// Flush if the accumulated payload has a number of rows equal or greater than this. pub flush_num_rows: u64, - /// Size of the internal channel of [`DataTableBatcherCmd`]s. + /// Size of the internal channel of commands. /// /// Unbounded if left unspecified. pub max_commands_in_flight: Option, @@ -96,7 +96,7 @@ impl DataTableBatcherConfig { max_tables_in_flight: None, }; - /// Environment variable to configure [`Self::flush_duration`]. + /// Environment variable to configure [`Self::flush_tick`]. pub const ENV_FLUSH_TICK: &str = "RERUN_FLUSH_TICK_SECS"; /// Environment variable to configure [`Self::flush_num_bytes`]. @@ -117,8 +117,7 @@ impl DataTableBatcherConfig { /// Returns a copy of `self`, overriding existing fields with values from the environment if /// they are present. /// - /// See [`Self::ENV_FLUSH_DURATION`], [`Self::ENV_FLUSH_NUM_BYTES`], - /// [`Self::ENV_FLUSH_NUM_BYTES`]. + /// See [`Self::ENV_FLUSH_TICK`], [`Self::ENV_FLUSH_NUM_BYTES`], [`Self::ENV_FLUSH_NUM_BYTES`]. pub fn apply_env(&self) -> DataTableBatcherResult { let mut new = self.clone(); @@ -342,7 +341,7 @@ impl DataTableBatcher { // --- Subscribe to tables --- - /// Returns a _shared_ channel in which are sent the batched [`DataTables`]. + /// Returns a _shared_ channel in which are sent the batched [`DataTable`]s. /// /// Shutting down the batcher will close this channel. /// From c1088c57020d8994a23fbbe3b33ba05b2093fac4 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 26 Apr 2023 19:00:19 +0200 Subject: [PATCH 06/31] self review --- crates/re_log_types/src/data_table_batcher.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/re_log_types/src/data_table_batcher.rs b/crates/re_log_types/src/data_table_batcher.rs index df4e6992f1a6..f9a39096ec75 100644 --- a/crates/re_log_types/src/data_table_batcher.rs +++ b/crates/re_log_types/src/data_table_batcher.rs @@ -327,7 +327,7 @@ impl DataTableBatcher { self.inner.flush_blocking() } - /// Shutdowns the batcher. + /// Initiates an asynchronous shutdown of the batcher. /// /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. /// @@ -335,7 +335,7 @@ impl DataTableBatcher { /// /// This can only fail if called on a `DataTableBatcher` that has already shutdown. #[inline] - pub fn shutdown(&self) -> Result<(), SendError<()>> { + pub fn shutdown_async(&self) -> Result<(), SendError<()>> { self.inner.shutdown() } @@ -458,7 +458,6 @@ fn batching_thread( let Ok(cmd) = cmd else { // All command senders are gone, which can only happen if the // `DataTableBatcher` itself has been dropped. - do_flush_all(&mut acc, &tx_table, "all_dropped"); break; }; From cbf17be498fa4d0e1532252775192a587227969c Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 26 Apr 2023 23:04:48 +0200 Subject: [PATCH 07/31] don't expose shutdown to make errors impossible --- crates/re_log_types/src/data_table_batcher.rs | 91 ++++++------------- 1 file changed, 30 insertions(+), 61 deletions(-) diff --git a/crates/re_log_types/src/data_table_batcher.rs b/crates/re_log_types/src/data_table_batcher.rs index f9a39096ec75..a732312405f3 100644 --- a/crates/re_log_types/src/data_table_batcher.rs +++ b/crates/re_log_types/src/data_table_batcher.rs @@ -3,7 +3,7 @@ use std::{ time::{Duration, Instant}, }; -use crossbeam::channel::{Receiver, SendError, Sender}; +use crossbeam::channel::{Receiver, Sender}; use crate::{DataRow, DataTable, SizeBytes, TableId}; @@ -212,6 +212,9 @@ pub struct DataTableBatcher { // NOTE: The receiving end of the command stream as well as the sending end of the table stream are // owned solely by the batching thread. struct DataTableBatcherInner { + /// The one and only entrypoint into the pipeline: this is _never_ cloned nor publicly exposed, + /// therefore the `Drop` implementation is guaranteed that no more data can come in while it's + /// running. tx_cmds: Sender, rx_tables: Receiver, cmds_to_tables_handle: Option>, @@ -219,7 +222,9 @@ struct DataTableBatcherInner { impl Drop for DataTableBatcherInner { fn drop(&mut self) { - self.shutdown().ok(); // can only fail if already down + // NOTE: The command channel is private, if we're here, nothing is currently capable of + // sending data down the pipeline. + self.tx_cmds.send(Command::Shutdown).ok(); if let Some(handle) = self.cmds_to_tables_handle.take() { handle.join().ok(); } @@ -292,51 +297,26 @@ impl DataTableBatcher { /// This will call [`DataRow::compute_all_size_bytes`] from the batching thread! /// /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. - /// - /// ## Failure - /// - /// This can only fail if called on a `DataTableBatcher` that has already shutdown. - /// The data can be retrieved from the returned error value in such cases. #[inline] - pub fn push_row(&self, row: DataRow) -> Result<(), SendError> { - self.inner.push_row(row) + pub fn push_row(&self, row: DataRow) { + self.inner.push_row(row); } /// Initiates a flush of the pipeline and returns immediately. /// /// This does **not** wait for the flush to propagate (see [`Self::flush_blocking`]). /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. - /// - /// ## Failure - /// - /// This can only fail if called on a `DataTableBatcher` that has already shutdown. #[inline] - pub fn flush_async(&self) -> Result<(), SendError<()>> { - self.inner.flush_and_forget() + pub fn flush_async(&self) { + self.inner.flush_async(); } /// Initiates a flush the batching pipeline and waits for it to propagate. /// /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. - /// - /// ## Failure - /// - /// This can only fail if called on a `DataTableBatcher` that has already shutdown. #[inline] - pub fn flush_blocking(&self) -> Result<(), SendError<()>> { - self.inner.flush_blocking() - } - - /// Initiates an asynchronous shutdown of the batcher. - /// - /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. - /// - /// ## Failure - /// - /// This can only fail if called on a `DataTableBatcher` that has already shutdown. - #[inline] - pub fn shutdown_async(&self) -> Result<(), SendError<()>> { - self.inner.shutdown() + pub fn flush_blocking(&self) { + self.inner.flush_blocking(); } // --- Subscribe to tables --- @@ -352,36 +332,25 @@ impl DataTableBatcher { } impl DataTableBatcherInner { - fn push_row(&self, row: DataRow) -> Result<(), SendError> { - self.send_cmd(Command::AppendRow(row)) - .map_err(|err| match err.0 { - Command::AppendRow(row) => SendError(row), - _ => unreachable!(), - }) + fn push_row(&self, row: DataRow) { + self.send_cmd(Command::AppendRow(row)); } - fn flush_and_forget(&self) -> Result<(), SendError<()>> { + fn flush_async(&self) { let (flush_cmd, _) = Command::flush(); - self.send_cmd(flush_cmd).map_err(|_err| SendError(()))?; - Ok(()) + self.send_cmd(flush_cmd); } - fn flush_blocking(&self) -> Result<(), SendError<()>> { + fn flush_blocking(&self) { let (flush_cmd, oneshot) = Command::flush(); - self.send_cmd(flush_cmd).map_err(|_err| SendError(()))?; + self.send_cmd(flush_cmd); oneshot.recv().ok(); - Ok(()) - } - - fn shutdown(&self) -> Result<(), SendError<()>> { - self.flush_and_forget()?; - self.send_cmd(Command::Shutdown) - .map_err(|_err| SendError(()))?; - Ok(()) } - fn send_cmd(&self, cmd: Command) -> Result<(), SendError> { - self.tx_cmds.send(cmd) + fn send_cmd(&self, cmd: Command) { + // NOTE: Internal channels can never be closed outside of the `Drop` impl, this cannot + // fail. + self.tx_cmds.send(cmd).ok(); } } @@ -514,12 +483,12 @@ mod tests { assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); for row in expected.to_rows() { - batcher.push_row(row).unwrap(); + batcher.push_row(row); } assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); - batcher.flush_blocking().unwrap(); + batcher.flush_blocking(); { let mut table = tables.recv().unwrap(); @@ -548,7 +517,7 @@ mod tests { assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); for row in rows.clone() { - batcher.push_row(row).unwrap(); + batcher.push_row(row); } assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); @@ -601,7 +570,7 @@ mod tests { assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); for row in table.to_rows() { - batcher.push_row(row).unwrap(); + batcher.push_row(row); } // Expect all rows except for the last one (num_bytes trigger). @@ -660,7 +629,7 @@ mod tests { assert_eq!(Err(TryRecvError::Empty), tables.try_recv()); for row in table.to_rows() { - batcher.push_row(row).unwrap(); + batcher.push_row(row); } // Expect all rows except for the last one. @@ -721,13 +690,13 @@ mod tests { let batcher = batcher.clone(); move || { for row in rows.drain(..rows.len() - 1) { - batcher.push_row(row).unwrap(); + batcher.push_row(row); } std::thread::sleep(flush_duration * 2); let row = rows.last().cloned().unwrap(); - batcher.push_row(row).unwrap(); + batcher.push_row(row); } }); From e7b42bfd70f01d71b8df275935b715273fdd3e4a Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 26 Apr 2023 23:12:47 +0200 Subject: [PATCH 08/31] doc --- crates/re_log_types/src/data_table_batcher.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/crates/re_log_types/src/data_table_batcher.rs b/crates/re_log_types/src/data_table_batcher.rs index a732312405f3..64ceb71599ec 100644 --- a/crates/re_log_types/src/data_table_batcher.rs +++ b/crates/re_log_types/src/data_table_batcher.rs @@ -195,12 +195,8 @@ fn data_table_batcher_config() { /// /// ## Shutdown /// -/// The batcher can be shutdown either by dropping all instances of it, or simply by calling -/// [`Self::shutdown`]. -/// -/// The pipeline will first atomically close input channels, then flush, then finally shutdown. -/// Threads other than the caller that might be in the process of sending data will be greeted with -/// a `SendError` and an opportunity to retrieve the data before it gets lost. +/// The batcher can only be shutdown dropping all instances of it, at which point it will +/// automatically take care of flushing any pending data that might remain in the pipeline. /// /// Shutting down cannot ever block, unless the output channel is bounded and happens to be full /// (see [`DataTableBatcherConfig::max_tables_in_flight`]). From 573de98a9858e9a3a61e88fe1d5bf11a65ba5c32 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 27 Apr 2023 00:12:27 +0200 Subject: [PATCH 09/31] backport --- crates/re_log_types/src/data_table_batcher.rs | 4 ++-- crates/re_log_types/src/lib.rs | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/crates/re_log_types/src/data_table_batcher.rs b/crates/re_log_types/src/data_table_batcher.rs index 64ceb71599ec..b30db761ff3c 100644 --- a/crates/re_log_types/src/data_table_batcher.rs +++ b/crates/re_log_types/src/data_table_batcher.rs @@ -17,14 +17,14 @@ pub enum DataTableBatcherError { ParseConfig { name: &'static str, value: String, - err: Box, + err: Box, }, /// Error spawning one of the background threads. #[error("Failed to spawn background thread '{name}': {err}")] SpawnThread { name: &'static str, - err: Box, + err: Box, }, } diff --git a/crates/re_log_types/src/lib.rs b/crates/re_log_types/src/lib.rs index dbc635884d20..1171550e3bf1 100644 --- a/crates/re_log_types/src/lib.rs +++ b/crates/re_log_types/src/lib.rs @@ -53,7 +53,9 @@ pub use self::time_range::{TimeRange, TimeRangeF}; pub use self::time_real::TimeReal; #[cfg(not(target_arch = "wasm32"))] -pub use self::data_table_batcher::{DataTableBatcher, DataTableBatcherConfig}; +pub use self::data_table_batcher::{ + DataTableBatcher, DataTableBatcherConfig, DataTableBatcherError, +}; pub mod external { pub use arrow2; From 67dc616ed59221e23555e0aabfdeb37d0afd1480 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 27 Apr 2023 11:10:58 +0200 Subject: [PATCH 10/31] backport --- crates/re_log_types/src/data_table_batcher.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/re_log_types/src/data_table_batcher.rs b/crates/re_log_types/src/data_table_batcher.rs index b30db761ff3c..097d7de88659 100644 --- a/crates/re_log_types/src/data_table_batcher.rs +++ b/crates/re_log_types/src/data_table_batcher.rs @@ -179,16 +179,16 @@ fn data_table_batcher_config() { // --- /// Implements an asynchronous batcher that coalesces [`DataRow`]s into [`DataTable`]s based upon -/// the threshold defined in the associated [`DataTableBatcherConfig`]. +/// the thresholds defined in the associated [`DataTableBatcherConfig`]. /// /// ## Multithreading and ordering /// -/// `DataTableBatcher` can be cheaply clone and used freely across any number of threads. +/// [`DataTableBatcher`] can be cheaply clone and used freely across any number of threads. /// /// Internally, all operations are linearized into a pipeline: /// - All operations sent by a given thread will take effect in the same exact order as that -/// thread originally sent them in from its point of view. -/// - There is no defined global ordering across multiple threads. +/// thread originally sent them in, from its point of view. +/// - There isn't any well defined global order across multiple threads. /// /// This means that e.g. flushing the pipeline ([`Self::flush_blocking`]) guarantees that all /// previous data sent by the calling thread has been batched; no more, no less. @@ -242,7 +242,7 @@ impl Command { } impl DataTableBatcher { - /// Creates a new `DataTableBatcher` using the passed in `config`. + /// Creates a new [`DataTableBatcher`] using the passed in `config`. /// /// The returned object must be kept in scope: dropping it will trigger a clean shutdown of the /// batcher. @@ -292,7 +292,7 @@ impl DataTableBatcher { /// /// This will call [`DataRow::compute_all_size_bytes`] from the batching thread! /// - /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + /// See [`DataTableBatcher`] docs for ordering semantics and multithreading guarantees. #[inline] pub fn push_row(&self, row: DataRow) { self.inner.push_row(row); @@ -301,7 +301,7 @@ impl DataTableBatcher { /// Initiates a flush of the pipeline and returns immediately. /// /// This does **not** wait for the flush to propagate (see [`Self::flush_blocking`]). - /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + /// See [`DataTableBatcher`] docs for ordering semantics and multithreading guarantees. #[inline] pub fn flush_async(&self) { self.inner.flush_async(); @@ -309,7 +309,7 @@ impl DataTableBatcher { /// Initiates a flush the batching pipeline and waits for it to propagate. /// - /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + /// See [`DataTableBatcher`] docs for ordering semantics and multithreading guarantees. #[inline] pub fn flush_blocking(&self) { self.inner.flush_blocking(); @@ -321,7 +321,7 @@ impl DataTableBatcher { /// /// Shutting down the batcher will close this channel. /// - /// See `DataTableBatcher` docs for ordering semantics and multithreading guarantees. + /// See [`DataTableBatcher`] docs for ordering semantics and multithreading guarantees. pub fn tables(&self) -> Receiver { self.inner.rx_tables.clone() } From 14130c57a37570de8f2c82816bc39b99fb863324 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 27 Apr 2023 18:29:35 +0200 Subject: [PATCH 11/31] introduce RecordingStream --- Cargo.lock | 11 + crates/re_sdk/Cargo.toml | 1 + crates/re_sdk/src/global.rs | 23 - crates/re_sdk/src/lib.rs | 16 +- crates/re_sdk/src/log_sink.rs | 96 +-- crates/re_sdk/src/msg_sender.rs | 47 +- crates/re_sdk/src/recording_stream.rs | 755 ++++++++++++++++++++++ crates/re_sdk/src/session.rs | 320 --------- crates/rerun/src/clap.rs | 28 +- crates/rerun/src/lib.rs | 24 +- crates/rerun/src/native_viewer.rs | 33 +- examples/rust/api_demo/src/main.rs | 150 ++--- examples/rust/dna/src/main.rs | 18 +- examples/rust/minimal/src/main.rs | 10 +- examples/rust/minimal_options/src/main.rs | 10 +- examples/rust/objectron/src/main.rs | 55 +- examples/rust/raw_mesh/src/main.rs | 22 +- rerun_py/src/python_session.rs | 6 +- tests/rust/test_image_memory/src/main.rs | 10 +- 19 files changed, 1031 insertions(+), 604 deletions(-) delete mode 100644 crates/re_sdk/src/global.rs create mode 100644 crates/re_sdk/src/recording_stream.rs delete mode 100644 crates/re_sdk/src/session.rs diff --git a/Cargo.lock b/Cargo.lock index dabac16c4dc7..af835da7e69e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -763,6 +763,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "clock" +version = "0.6.0-alpha.0" +dependencies = [ + "anyhow", + "clap 4.1.4", + "glam", + "rerun", +] + [[package]] name = "cocoa" version = "0.24.1" @@ -4061,6 +4071,7 @@ name = "re_sdk" version = "0.6.0-alpha.0" dependencies = [ "arrow2_convert", + "crossbeam", "document-features", "ndarray", "ndarray-rand", diff --git a/crates/re_sdk/Cargo.toml b/crates/re_sdk/Cargo.toml index 018bc298a43b..635654865e0c 100644 --- a/crates/re_sdk/Cargo.toml +++ b/crates/re_sdk/Cargo.toml @@ -39,6 +39,7 @@ re_log.workspace = true re_memory.workspace = true re_sdk_comms = { workspace = true, features = ["client"] } +crossbeam.workspace = true document-features = "0.2" parking_lot.workspace = true thiserror.workspace = true diff --git a/crates/re_sdk/src/global.rs b/crates/re_sdk/src/global.rs deleted file mode 100644 index 9e4d6d398a3c..000000000000 --- a/crates/re_sdk/src/global.rs +++ /dev/null @@ -1,23 +0,0 @@ -use crate::session::Session; - -/// Access a global [`Session`] singleton for convenient logging. -/// -/// The default [`Session`] is a disabled dummy-session that ignore all log calls, -/// so you need to explicitly set the global session for it to be useful -/// -/// Example usage: -/// -/// ``` -/// use re_sdk::{global_session, SessionBuilder, default_server_addr}; -/// -/// *global_session() = SessionBuilder::new("my_app").connect(default_server_addr()); -/// -/// // Call code that logs using `global_session`. -/// ``` -pub fn global_session() -> parking_lot::MutexGuard<'static, Session> { - use once_cell::sync::OnceCell; - use parking_lot::Mutex; - static INSTANCE: OnceCell> = OnceCell::new(); - let mutex = INSTANCE.get_or_init(|| Mutex::new(Session::disabled())); - mutex.lock() -} diff --git a/crates/re_sdk/src/lib.rs b/crates/re_sdk/src/lib.rs index a7e7fe27607f..ac5014fab40b 100644 --- a/crates/re_sdk/src/lib.rs +++ b/crates/re_sdk/src/lib.rs @@ -9,21 +9,15 @@ // ---------------- // Private modules: -#[cfg(feature = "global_session")] -mod global; - mod log_sink; mod msg_sender; -mod session; +mod recording_stream; // ------------- // Public items: -#[cfg(feature = "global_session")] -pub use self::global::global_session; - pub use self::msg_sender::{MsgSender, MsgSenderError}; -pub use self::session::{Session, SessionBuilder}; +pub use self::recording_stream::{RecordingStream, RecordingStreamBuilder}; pub use re_sdk_comms::default_server_addr; @@ -49,9 +43,7 @@ pub mod demo_util; /// This is how you select whether the log stream ends up /// sent over TCP, written to file, etc. pub mod sink { - pub use crate::log_sink::{ - disabled, BufferedSink, LogSink, MemorySink, MemorySinkStorage, TcpSink, - }; + pub use crate::log_sink::{BufferedSink, LogSink, MemorySink, MemorySinkStorage, TcpSink}; #[cfg(not(target_arch = "wasm32"))] pub use re_log_encoding::{FileSink, FileSinkError}; @@ -153,7 +145,7 @@ pub fn decide_logging_enabled(default_enabled: bool) -> bool { // ---------------------------------------------------------------------------- -/// Creates a new [`re_log_types::RecordingInfo`] which can be used with [`Session::new`]. +/// Creates a new [`re_log_types::RecordingInfo`] which can be used with [`RecordingStream::new`]. #[track_caller] // track_caller so that we can see if we are being called from an official example. pub fn new_recording_info( application_id: impl Into, diff --git a/crates/re_sdk/src/log_sink.rs b/crates/re_sdk/src/log_sink.rs index 639c682aaf21..7a1d9b751813 100644 --- a/crates/re_sdk/src/log_sink.rs +++ b/crates/re_sdk/src/log_sink.rs @@ -1,3 +1,6 @@ +use std::sync::Arc; + +use parking_lot::RwLock; use re_log_types::LogMsg; /// Where the SDK sends its log messages. @@ -6,6 +9,7 @@ pub trait LogSink: Send + Sync + 'static { fn send(&self, msg: LogMsg); /// Send all these log messages. + #[inline] fn send_all(&self, messages: Vec) { for msg in messages { self.send(msg); @@ -13,40 +17,23 @@ pub trait LogSink: Send + Sync + 'static { } /// Drain all buffered [`LogMsg`]es and return them. + /// + /// Only applies to sinks that maintain a backlog. + #[inline] fn drain_backlog(&self) -> Vec { vec![] } - /// Wait until all logged data have been sent to the remove server (if any). - fn flush(&self) {} - - /// If the TCP session is disconnected, allow it to quit early and drop unsent messages. - fn drop_msgs_if_disconnected(&self) {} - - /// Returns `false` if this sink just discards all messages. - fn is_enabled(&self) -> bool { - true - } -} - -// ---------------------------------------------------------------------------- - -struct DisabledSink; - -impl LogSink for DisabledSink { - fn send(&self, _msg: LogMsg) { - // It's intended that the logging SDK should drop messages earlier than this if logging is disabled. - re_log::debug_once!("Logging is disabled, dropping message(s)."); - } - - fn is_enabled(&self) -> bool { - false - } -} + /// Blocks until all pending data in the sink's send buffers has been fully flushed. + /// + /// See also [`LogSink::drop_if_disconnected`]. + #[inline] + fn flush_blocking(&self) {} -/// A sink that does nothing. All log messages are just dropped. -pub fn disabled() -> Box { - Box::new(DisabledSink) + /// Drops all pending data currently sitting in the sink's send buffers if it is unable to + /// flush it for any reason (e.g. a broken TCP connection for a [`TcpSink`]). + #[inline] + fn drop_if_disconnected(&self) {} } // ---------------------------------------------------------------------------- @@ -57,26 +44,30 @@ pub struct BufferedSink(parking_lot::Mutex>); impl BufferedSink { /// An empty buffer. + #[inline] pub fn new() -> Self { Self::default() } } impl LogSink for BufferedSink { + #[inline] fn send(&self, msg: LogMsg) { self.0.lock().push(msg); } + #[inline] fn send_all(&self, mut messages: Vec) { self.0.lock().append(&mut messages); } + #[inline] fn drain_backlog(&self) -> Vec { std::mem::take(&mut self.0.lock()) } } -/// Store log messages directly in memory +/// Store log messages directly in memory. /// /// Although very similar to `BufferedSink` this sink is a real-endpoint. When creating /// a new sink the logged messages stay with the `MemorySink` (`drain_backlog` does nothing). @@ -88,37 +79,52 @@ pub struct MemorySink(MemorySinkStorage); impl MemorySink { /// Access the raw `MemorySinkStorage` + #[inline] pub fn buffer(&self) -> MemorySinkStorage { self.0.clone() } } impl LogSink for MemorySink { + #[inline] fn send(&self, msg: LogMsg) { - self.0.lock().push(msg); + self.0.write().push(msg); } + #[inline] fn send_all(&self, mut messages: Vec) { - self.0.lock().append(&mut messages); + self.0.write().append(&mut messages); } } -/// The storage used by [`MemorySink`] +/// The storage used by [`MemorySink`]. #[derive(Default, Clone)] -pub struct MemorySinkStorage(std::sync::Arc>>); +pub struct MemorySinkStorage(Arc>>); -/// impl MemorySinkStorage { - /// Lock the contained buffer - fn lock(&self) -> parking_lot::MutexGuard<'_, Vec> { - self.0.lock() + /// Write access to the inner array of [`LogMsg`]. + #[inline] + fn write(&self) -> parking_lot::RwLockWriteGuard<'_, Vec> { + self.0.write() + } + + /// Read access to the inner array of [`LogMsg`]. + #[inline] + pub fn read(&self) -> parking_lot::RwLockReadGuard<'_, Vec> { + self.0.read() + } + + /// Consumes and returns the inner array of [`LogMsg`]. + #[inline] + pub fn take(&self) -> Vec { + std::mem::take(&mut *self.0.write()) } - /// Convert the stored messages into an in-memory Rerun log file + /// Convert the stored messages into an in-memory Rerun log file. + #[inline] pub fn rrd_as_bytes(&self) -> Result, re_log_encoding::encoder::EncodeError> { - let messages = self.lock(); let mut buffer = std::io::Cursor::new(Vec::new()); - re_log_encoding::encoder::encode(messages.iter(), &mut buffer)?; + re_log_encoding::encoder::encode(self.read().iter(), &mut buffer)?; Ok(buffer.into_inner()) } } @@ -133,6 +139,7 @@ pub struct TcpSink { impl TcpSink { /// Connect to the given address in a background thread. /// Retries until successful. + #[inline] pub fn new(addr: std::net::SocketAddr) -> Self { Self { client: re_sdk_comms::Client::new(addr), @@ -141,15 +148,18 @@ impl TcpSink { } impl LogSink for TcpSink { + #[inline] fn send(&self, msg: LogMsg) { self.client.send(msg); } - fn flush(&self) { + #[inline] + fn flush_blocking(&self) { self.client.flush(); } - fn drop_msgs_if_disconnected(&self) { + #[inline] + fn drop_if_disconnected(&self) { self.client.drop_if_disconnected(); } } diff --git a/crates/re_sdk/src/msg_sender.rs b/crates/re_sdk/src/msg_sender.rs index 0bd6832a3f2f..2269b57d1fc6 100644 --- a/crates/re_sdk/src/msg_sender.rs +++ b/crates/re_sdk/src/msg_sender.rs @@ -1,19 +1,19 @@ -use std::borrow::Borrow; - -use re_log_types::{component_types::InstanceKey, DataRow, DataTableError, RecordingId, RowId}; +use re_log_types::{component_types::InstanceKey, DataRow, DataTableError, RowId}; use crate::{ components::Transform, - log::{DataCell, LogMsg}, - sink::LogSink, + log::DataCell, time::{Time, TimeInt, TimePoint, Timeline}, - Component, EntityPath, SerializableComponent, Session, + Component, EntityPath, RecordingStream, SerializableComponent, }; // TODO(#1619): Rust SDK batching // --- +// TODO: effectively this is nothing but a rowbuilder? except sometimes it creates table due to +// limitations.. + /// Errors that can occur when constructing or sending messages /// using [`MsgSender`]. #[derive(thiserror::Error, Debug)] @@ -35,7 +35,7 @@ pub enum MsgSenderError { /// /// ```ignore /// fn log_coordinate_space( -/// session: &Session, +/// rec_stream: &RecordingStream, /// ent_path: impl Into, /// axes: &str, /// ) -> anyhow::Result<()> { @@ -46,12 +46,13 @@ pub enum MsgSenderError { /// MsgSender::new(ent_path) /// .with_timeless(true) /// .with_component(&[view_coords])? -/// .send(session) +/// .send(rec_stream) /// .map_err(Into::into) /// } /// ``` // TODO(#1619): this whole thing needs to be rethought to incorporate batching and datatables. pub struct MsgSender { + // TODO // TODO(cmc): At the moment, a `MsgBundle` can only contain data for a single entity, so // this must be known as soon as we spawn the builder. // This won't be true anymore once batch insertions land. @@ -231,42 +232,24 @@ impl MsgSender { /// Consumes, packs, sanity checks and finally sends the message to the currently configured /// target of the SDK. - pub fn send(self, session: &Session) -> Result<(), DataTableError> { - self.send_to_sink(session.recording_id(), session.borrow()) - } - - /// Consumes, packs, sanity checks and finally sends the message to the currently configured - /// target of the SDK. - fn send_to_sink( - self, - recording_id: RecordingId, - sink: &dyn LogSink, - ) -> Result<(), DataTableError> { - if !sink.is_enabled() { + // TODO: wtf is this a DataTableError? + pub fn send(self, rec_stream: &RecordingStream) -> Result<(), DataTableError> { + if !rec_stream.is_enabled() { return Ok(()); // silently drop the message } let [row_standard, row_transforms, row_splats] = self.into_rows(); if let Some(row_transforms) = row_transforms { - sink.send(LogMsg::ArrowMsg( - recording_id, - row_transforms.into_table().to_arrow_msg()?, - )); + rec_stream.record_row(row_transforms); } if let Some(row_splats) = row_splats { - sink.send(LogMsg::ArrowMsg( - recording_id, - row_splats.into_table().to_arrow_msg()?, - )); + rec_stream.record_row(row_splats); } // Always the primary component last so range-based queries will include the other data. // Since the primary component can't be splatted it must be in msg_standard, see(#1215). if let Some(row_standard) = row_standard { - sink.send(LogMsg::ArrowMsg( - recording_id, - row_standard.into_table().to_arrow_msg()?, - )); + rec_stream.record_row(row_standard); } Ok(()) diff --git a/crates/re_sdk/src/recording_stream.rs b/crates/re_sdk/src/recording_stream.rs new file mode 100644 index 000000000000..3c156ebb5501 --- /dev/null +++ b/crates/re_sdk/src/recording_stream.rs @@ -0,0 +1,755 @@ +use std::sync::Arc; + +use crossbeam::channel::{Receiver, Sender}; +use re_log_types::{ + ApplicationId, DataRow, DataTable, DataTableBatcher, DataTableBatcherConfig, + DataTableBatcherError, LogMsg, RecordingId, RecordingInfo, RecordingSource, Time, +}; + +use crate::sink::{LogSink, MemorySinkStorage}; + +// --- + +/// Errors that can occur when creating/manipulating a [`DataTableBatcher`]. +#[derive(thiserror::Error, Debug)] +pub enum RecordingStreamError { + /// Error spawning one of the background threads. + #[error("Failed to spawn the underlying batcher: {0}")] + DataTableBatcher(#[from] DataTableBatcherError), + + /// Error spawning one of the background threads. + #[error("Failed to spawn background thread '{name}': {err}")] + SpawnThread { + name: &'static str, + err: Box, + }, +} + +pub type RecordingStreamResult = Result; + +// --- + +/// Construct a [`RecordingStream`]. +/// +/// ``` no_run +/// # use re_sdk::RecordingStreamBuilder; +/// let rec_stream = RecordingStreamBuilder::new("my_app").save("my_recording.rrd")?; +/// # Ok::<(), Box>(()) +/// ``` +pub struct RecordingStreamBuilder { + application_id: ApplicationId, + recording_id: Option, + recording_source: Option, + + default_enabled: bool, + enabled: Option, + + batcher_config: Option, + + is_official_example: bool, +} + +impl RecordingStreamBuilder { + /// Create a new [`RecordingStreamBuilder`] with the given [`ApplicationId`]. + /// + /// The [`ApplicationId`] is usually the name of your app. + /// + /// ```no_run + /// # use re_sdk::RecordingStreamBuilder; + /// let rec_stream = RecordingStreamBuilder::new("my_app").save("my_recording.rrd")?; + /// # Ok::<(), Box>(()) + /// ``` + // + // NOTE: track_caller so that we can see if we are being called from an official example. + #[track_caller] + pub fn new(application_id: impl Into) -> Self { + let application_id = application_id.into(); + let is_official_example = crate::called_from_official_rust_example(); + + Self { + application_id, + recording_id: None, + recording_source: None, + + default_enabled: true, + enabled: None, + + batcher_config: None, + is_official_example, + } + } + + /// Set whether or not Rerun is enabled by default. + /// + /// If the `RERUN` environment variable is set, it will override this. + /// + /// Set also: [`Self::enabled`]. + pub fn default_enabled(mut self, default_enabled: bool) -> Self { + self.default_enabled = default_enabled; + self + } + + /// Set whether or not Rerun is enabled. + /// + /// Setting this will ignore the `RERUN` environment variable. + /// + /// Set also: [`Self::default_enabled`]. + pub fn enabled(mut self, enabled: bool) -> Self { + self.enabled = Some(enabled); + self + } + + /// Set the [`RecordingId`] for this context. + /// + /// If you're logging from multiple processes and want all the messages to end up as the same + /// recording, you must make sure they all set the same [`RecordingId`] using this function. + /// + /// Note that many recordings can share the same [`ApplicationId`], but they all have + /// unique [`RecordingId`]s. + /// + /// The default is to use a random [`RecordingId`]. + pub fn recording_id(mut self, recording_id: RecordingId) -> Self { + self.recording_id = Some(recording_id); + self + } + + /// Specifies the configuration of the internal data batching mechanism. + /// + /// See [`DataTableBatcher`] & [`DataTableBatcherConfig`] for more information. + pub fn batcher_config(mut self, config: DataTableBatcherConfig) -> Self { + self.batcher_config = Some(config); + self + } + + #[doc(hidden)] + pub fn recording_source(mut self, recording_source: RecordingSource) -> Self { + self.recording_source = Some(recording_source); + self + } + + #[doc(hidden)] + pub fn is_official_example(mut self, is_official_example: bool) -> Self { + self.is_official_example = is_official_example; + self + } + + /// Creates a new [`RecordingStream`] that starts in a buffering state (RAM). + /// + /// ## Example + /// + /// ```no_run + /// let rec_stream = re_sdk::RecordingStreamBuilder::new("my_app").buffered(); + /// ``` + pub fn buffered(self) -> RecordingStreamResult { + let (enabled, recording_info, batcher_config) = self.finalize(); + if enabled { + RecordingStream::new( + recording_info, + batcher_config, + Box::new(crate::log_sink::BufferedSink::new()), + ) + } else { + re_log::debug!("Rerun disabled - call to buffered() ignored"); + Ok(RecordingStream::disabled()) + } + } + + /// Creates a new [`RecordingStream`] that is pre-configured to stream the data through to a + /// [`crate::log_sink::MemorySink`]. + /// + /// ## Example + /// + /// ```no_run + /// let (rec_stream, storage) = re_sdk::RecordingStreamBuilder::new("my_app").memory().unwrap(); + /// ``` + pub fn memory( + self, + ) -> RecordingStreamResult<(RecordingStream, crate::log_sink::MemorySinkStorage)> { + let sink = crate::log_sink::MemorySink::default(); + let storage = sink.buffer(); + + let (enabled, recording_info, batcher_config) = self.finalize(); + if enabled { + RecordingStream::new(recording_info, batcher_config, Box::new(sink)) + .map(|rec_stream| (rec_stream, storage)) + } else { + re_log::debug!("Rerun disabled - call to memory_recording() ignored"); + Ok((RecordingStream::disabled(), Default::default())) + } + } + + /// Creates a new [`RecordingStream`] that is pre-configured to stream the data through to a + /// remote Rerun instance. + /// + /// ## Example + /// + /// ```no_run + /// let rec_stream = re_sdk::RecordingStreamBuilder::new("my_app") + /// .connect(re_sdk::default_server_addr()); + /// ``` + pub fn connect(self, addr: std::net::SocketAddr) -> RecordingStreamResult { + let (enabled, recording_info, batcher_config) = self.finalize(); + if enabled { + RecordingStream::new( + recording_info, + batcher_config, + Box::new(crate::log_sink::TcpSink::new(addr)), + ) + } else { + re_log::debug!("Rerun disabled - call to connect() ignored"); + Ok(RecordingStream::disabled()) + } + } + + /// Creates a new [`RecordingStream`] that is pre-configured to stream the data through to an + /// RRD file on disk. + /// + /// ## Example + /// + /// ```no_run + /// let rec_stream = re_sdk::RecordingStreamBuilder::new("my_app").save("my_recording.rrd")?; + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(not(target_arch = "wasm32"))] + pub fn save( + self, + path: impl Into, + ) -> RecordingStreamResult { + let (enabled, recording_info, batcher_config) = self.finalize(); + + if enabled { + RecordingStream::new( + recording_info, + batcher_config, + Box::new(crate::sink::FileSink::new(path).unwrap()), // TODO + ) + } else { + re_log::debug!("Rerun disabled - call to save() ignored"); + Ok(RecordingStream::disabled()) + } + } + + /// Returns whether or not logging is enabled, plus a [`RecordingInfo`]. + /// + /// This can be used to then construct a [`RecordingStream`] manually using + /// [`RecordingStream::new`]. + pub fn finalize(self) -> (bool, RecordingInfo, DataTableBatcherConfig) { + let Self { + application_id, + recording_id, + recording_source, + default_enabled, + enabled, + batcher_config, + is_official_example, + } = self; + + let enabled = enabled.unwrap_or_else(|| crate::decide_logging_enabled(default_enabled)); + let recording_id = recording_id.unwrap_or_else(RecordingId::random); + let recording_source = recording_source.unwrap_or_else(|| RecordingSource::RustSdk { + rustc_version: env!("RE_BUILD_RUSTC_VERSION").into(), + llvm_version: env!("RE_BUILD_LLVM_VERSION").into(), + }); + + let recording_info = RecordingInfo { + application_id, + recording_id, + is_official_example, + started: Time::now(), + recording_source, + }; + + let batcher_config = batcher_config + .unwrap_or_else(|| DataTableBatcherConfig::from_env().unwrap_or_default()); + + (enabled, recording_info, batcher_config) + } +} + +// ---------------------------------------------------------------------------- + +/// A [`RecordingStream`] handles everything related to logging data into Rerun. +/// +/// You can construct a new [`RecordingStream`] using [`RecordingStreamBuilder`] or +/// [`RecordingStream::new`]. +/// +/// ## Sinks +/// +/// Data is logged into Rerun via [`LogSink`]s. +/// +/// The underlying [`LogSink`] for a [`RecordingStream`] can be changed at any point during its +/// lifetime by calling [`RecordingStream::set_sink`] or one of the higher level helpers +/// ([`RecordingStream::connect`], [`RecordingStream::memory_recording`], +/// [`RecordingStream::save`], [`RecordingStream::disconnect`]). +/// +/// ## Multithreading and ordering +/// +/// [`RecordingStream`] can be cheaply cloned and used freely across any number of threads. +/// +/// Internally, all operations are linearized into a pipeline: +/// - All operations sent by a given thread will take effect in the same exact order as that +/// thread originally sent them in, from its point of view. +/// - There isn't any well defined global order across multiple threads. +/// +/// This means that e.g. flushing the pipeline ([`Self::flush_blocking`]) guarantees that all +/// previous data sent by the calling thread has been recorded; no more, no less. +/// +/// ## Shutdown +/// +/// The [`RecordingStream`] can only be shutdown by dropping all instances of it, at which point +/// it will automatically take care of flushing any pending data that might remain in the pipeline. +/// +/// Shutting down cannot ever block. +#[derive(Clone)] +pub struct RecordingStream { + inner: Arc>, +} + +struct RecordingStreamInner { + info: RecordingInfo, + + /// The one and only entrypoint into the pipeline: this is _never_ cloned nor publicly exposed, + /// therefore the `Drop` implementation is guaranteed that no more data can come in while it's + /// running. + cmds_tx: Sender, + + batcher: DataTableBatcher, + batcher_to_sink_handle: Option>, + // + // TODO(emilk): add convenience `TimePoint` here so that users can + // do things like `session.set_time_sequence("frame", frame_idx);` +} + +impl Drop for RecordingStreamInner { + fn drop(&mut self) { + // NOTE: The command channel is private, if we're here, nothing is currently capable of + // sending data down the pipeline. + self.batcher.flush_blocking(); + self.cmds_tx.send(Command::PopPendingTables).ok(); + self.cmds_tx.send(Command::Shutdown).ok(); + if let Some(handle) = self.batcher_to_sink_handle.take() { + handle.join().ok(); + } + } +} + +impl RecordingStreamInner { + fn new( + info: RecordingInfo, + batcher_config: DataTableBatcherConfig, + sink: Box, + ) -> RecordingStreamResult { + let batcher = DataTableBatcher::new(batcher_config)?; + + // TODO(cmc): BeginRecordingMsg is a misnomer; it's idempotent. + { + re_log::debug!( + app_id = %info.application_id, + rec_id = %info.recording_id, + "setting recording info", + ); + sink.send( + re_log_types::BeginRecordingMsg { + row_id: re_log_types::RowId::random(), + info: info.clone(), + } + .into(), + ); + } + + let (cmds_tx, cmds_rx) = crossbeam::channel::unbounded(); + + let batcher_to_sink_handle = { + const NAME: &str = "RecordingStream::batcher_to_sink"; + std::thread::Builder::new() + .name(NAME.into()) + .spawn({ + let info = info.clone(); + let batcher = batcher.clone(); + move || forwarding_thread(info, sink, cmds_rx, batcher.tables()) + }) + .map_err(|err| RecordingStreamError::SpawnThread { + name: NAME, + err: Box::new(err), + })? + }; + + Ok(RecordingStreamInner { + info, + cmds_tx, + batcher, + batcher_to_sink_handle: Some(batcher_to_sink_handle), + }) + } +} + +enum Command { + RecordMsg(LogMsg), + SwapSink(Box), + Flush(Sender<()>), + PopPendingTables, + Shutdown, +} + +impl Command { + fn flush() -> (Self, Receiver<()>) { + let (tx, rx) = crossbeam::channel::bounded(0); // oneshot + (Self::Flush(tx), rx) + } +} + +impl RecordingStream { + /// Creates a new [`RecordingStream`] with a given [`RecordingInfo`] and [`LogSink`]. + /// + /// You can create a [`RecordingInfo`] with [`crate::new_recording_info`]; + /// + /// The [`RecordingInfo`] is immediately sent to the sink in the form of a + /// [`re_log_types::BeginRecordingMsg`]. + /// + /// You can find sinks in [`crate::sink`]. + /// + /// See also: [`RecordingStreamBuilder`]. + #[must_use = "Recording will get closed automatically when this object is dropped"] + pub fn new( + info: RecordingInfo, + batcher_config: DataTableBatcherConfig, + sink: Box, + ) -> RecordingStreamResult { + RecordingStreamInner::new(info, batcher_config, sink).map(|inner| Self { + inner: Arc::new(Some(inner)), + }) + } + + /// Creates a new no-op [`RecordingStream`] that drops all logging messages, doesn't allocate + /// any memory and doesn't spawn any threads. + /// + /// [`Self::is_enabled`] will return `false`. + pub fn disabled() -> Self { + Self { + inner: Arc::new(None), + } + } +} + +#[allow(clippy::needless_pass_by_value)] +fn forwarding_thread( + info: RecordingInfo, + mut sink: Box, + cmds_rx: Receiver, + tables: Receiver, +) { + fn handle_cmd(info: &RecordingInfo, cmd: Command, sink: &mut Box) -> bool { + match cmd { + Command::RecordMsg(msg) => { + sink.send(msg); + } + Command::SwapSink(new_sink) => { + let backlog = { + // Capture the backlog if it exists. + let backlog = sink.drain_backlog(); + + // Flush the underlying sink if possible. + sink.drop_if_disconnected(); + sink.flush_blocking(); + + backlog + }; + + // Send the recording info to the new sink. This is idempotent. + { + re_log::debug!( + app_id = %info.application_id, + rec_id = %info.recording_id, + "setting recording info", + ); + new_sink.send( + re_log_types::BeginRecordingMsg { + row_id: re_log_types::RowId::random(), + info: info.clone(), + } + .into(), + ); + new_sink.send_all(backlog); + } + + *sink = new_sink; + } + Command::Flush(oneshot) => { + // Flush the underlying sink if possible. + sink.drop_if_disconnected(); + sink.flush_blocking(); + drop(oneshot); // signals the oneshot + } + Command::PopPendingTables => { + // No need to do anything, just skip the current iteration. + } + Command::Shutdown => return false, + } + + true + } + + use crossbeam::select; + loop { + // NOTE: Always pop tables first, this is what makes `Command::PopPendingTables` possible, + // which in turns makes `RecordingStream::flush_blocking` well defined. + while let Ok(table) = tables.try_recv() { + let table = match table.to_arrow_msg() { + Ok(table) => table, + Err(err) => { + re_log::error!(%err, + "couldn't serialize table; data dropped (this is a bug in Rerun!)"); + continue; + } + }; + sink.send(LogMsg::ArrowMsg(info.recording_id, table)); + } + + select! { + recv(tables) -> res => { + let Ok(table) = res else { + // The batcher is gone, which can only happen if the `RecordingStream` itself + // has been dropped. + break; + }; + let table = match table.to_arrow_msg() { + Ok(table) => table, + Err(err) => { + re_log::error!(%err, + "couldn't serialize table; data dropped (this is a bug in Rerun!)"); + continue; + } + }; + sink.send(LogMsg::ArrowMsg(info.recording_id, table)); + } + recv(cmds_rx) -> res => { + let Ok(cmd) = res else { + // All command senders are gone, which can only happen if the + // `RecordingStream` itself has been dropped. + break; + }; + if !handle_cmd(&info, cmd, &mut sink) { + break; // shutdown + } + } + } + + // NOTE: The receiving end of the command stream is owned solely by this thread. + // Past this point, all command writes will return `ErrDisconnected`. + } +} + +impl RecordingStream { + /// Check if logging is enabled on this `RecordingStream`. + /// + /// If not, all recording calls will be ignored. + #[inline] + pub fn is_enabled(&self) -> bool { + self.inner.is_some() + } + + /// The [`RecordingInfo`] associated with this `RecordingStream`. + #[inline] + pub fn recording_info(&self) -> Option<&RecordingInfo> { + (*self.inner).as_ref().map(|inner| &inner.info) + } +} + +impl RecordingStream { + /// Records an arbitrary [`LogMsg`]. + #[inline] + pub fn record_msg(&self, msg: LogMsg) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to record_msg() ignored"); + return; + }; + + // NOTE: Internal channels can never be closed outside of the `Drop` impl, this send cannot + // fail. + + this.cmds_tx.send(Command::RecordMsg(msg)).ok(); + } + + /// Records a [`re_log_types::PathOp`]. + /// + /// This is a convenience wrapper for [`Self::record_msg`]. + #[inline] + pub fn record_path_op( + &self, + timepoint: re_log_types::TimePoint, + path_op: re_log_types::PathOp, + ) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to record_path_op() ignored"); + return; + }; + + self.record_msg(LogMsg::EntityPathOpMsg( + this.info.recording_id, + re_log_types::EntityPathOpMsg { + row_id: re_log_types::RowId::random(), + time_point: timepoint, + path_op, + }, + )); + } + + /// Records a single [`DataRow`]. + /// + /// Internally, incoming [`DataRow`]s are automatically coalesced into larger [`DataTable`]s to + /// optimize for transport. + #[inline] + pub fn record_row(&self, row: DataRow) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to record_row() ignored"); + return; + }; + + this.batcher.push_row(row); + } + + /// Swaps the underlying sink for a new one. + /// + /// This guarantees that: + /// 1. all pending rows and tables are batched, collected and sent down the current sink, + /// 2. the current sink is flushed if it has pending data in its buffers, + /// 3. the current sink's backlog, if there's any, is forwarded to the new sink. + /// + /// When this function returns, the calling thread is guaranteed that all future record calls + /// will end up in the new sink. + /// + /// ## Data loss + /// + /// If the current sink is in a broken state (e.g. a TCP sink with a broken connection that + /// cannot be repaired), all pending data in its buffers will be dropped. + pub fn set_sink(&self, sink: Box) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to set_sink() ignored"); + return; + }; + + // NOTE: Internal channels can never be closed outside of the `Drop` impl, all these sends + // are safe. + + // 1. Flush the batcher down the table channel + this.batcher.flush_blocking(); + + // 2. Receive pending tables from the batcher's channel + this.cmds_tx.send(Command::PopPendingTables).ok(); + + // 3. Swap the sink, which will internally make sure to re-ingest the backlog if needed + this.cmds_tx.send(Command::SwapSink(sink)).ok(); + + // 4. Before we give control back to the caller, we need to make sure that the swap has + // taken place: we don't want the user to send data to the old sink! + let (cmd, oneshot) = Command::flush(); + this.cmds_tx.send(cmd).ok(); + oneshot.recv().ok(); + } + + /// Initiates a flush of the pipeline and returns immediately. + /// + /// This does **not** wait for the flush to propagate (see [`Self::flush_blocking`]). + /// See [`RecordingStream`] docs for ordering semantics and multithreading guarantees. + pub fn flush_async(&self) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to flush_async() ignored"); + return; + }; + + // NOTE: Internal channels can never be closed outside of the `Drop` impl, all these sends + // are safe. + + // 1. Flush the batcher down the table channel + this.batcher.flush_blocking(); + + // 2. Receive pending tables from the batcher's channel + this.cmds_tx.send(Command::PopPendingTables).ok(); + + // 3. Wait for all tables to have been forwarded + let (cmd, oneshot) = Command::flush(); + this.cmds_tx.send(cmd).ok(); + oneshot.recv().ok(); + } + + /// Initiates a flush the batching pipeline and waits for it to propagate. + /// + /// See [`RecordingStream`] docs for ordering semantics and multithreading guarantees. + pub fn flush_blocking(&self) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to flush_blocking() ignored"); + return; + }; + + // NOTE: Internal channels can never be closed outside of the `Drop` impl, all these sends + // are safe. + + // 1. Flush the batcher down the table channel + this.batcher.flush_blocking(); + + // 2. Receive pending tables from the batcher's channel + this.cmds_tx.send(Command::PopPendingTables).ok(); + + // 3. Wait for all tables to have been forwarded + let (cmd, oneshot) = Command::flush(); + this.cmds_tx.send(cmd).ok(); + oneshot.recv().ok(); + } +} + +impl RecordingStream { + /// Swaps the underlying sink for a [`crate::log_sink::TcpSink`] sink pre-configured to use + /// the specified address. + /// + /// This is a convenience wrapper for [`Self::set_sink`] that upholds the same guarantees in + /// terms of data durability and ordering. + /// See [`Self::set_sink`] for more information. + pub fn connect(&self, addr: std::net::SocketAddr) { + self.set_sink(Box::new(crate::log_sink::TcpSink::new(addr))); + } + + /// Swaps the underlying sink for a [`crate::sink::MemorySink`] sink and returns the associated + /// [`MemorySinkStorage`]. + /// + /// This is a convenience wrapper for [`Self::set_sink`] that upholds the same guarantees in + /// terms of data durability and ordering. + /// See [`Self::set_sink`] for more information. + pub fn memory(&self) -> MemorySinkStorage { + let sink = crate::sink::MemorySink::default(); + let buffer = sink.buffer(); + self.set_sink(Box::new(sink)); + buffer + } + + /// Swaps the underlying sink for a [`crate::sink::FileSink`] at the specified `path`. + /// + /// This is a convenience wrapper for [`Self::set_sink`] that upholds the same guarantees in + /// terms of data durability and ordering. + /// See [`Self::set_sink`] for more information. + pub fn save( + &self, + path: impl Into, + ) -> Result<(), crate::sink::FileSinkError> { + let sink = crate::sink::FileSink::new(path)?; + self.set_sink(Box::new(sink)); + Ok(()) + } + + /// Swaps the underlying sink for a [`crate::sink::BufferedSink`]. + /// + /// This is a convenience wrapper for [`Self::set_sink`] that upholds the same guarantees in + /// terms of data durability and ordering. + /// See [`Self::set_sink`] for more information. + pub fn disconnect(&self) { + self.set_sink(Box::new(crate::sink::BufferedSink::new())); + } +} + +// TODO: some regression tests and we're all good +// TODO: an example would probably be nice too + +#[test] +fn recording_stream_impl_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); +} diff --git a/crates/re_sdk/src/session.rs b/crates/re_sdk/src/session.rs deleted file mode 100644 index b9fbc1d419d3..000000000000 --- a/crates/re_sdk/src/session.rs +++ /dev/null @@ -1,320 +0,0 @@ -use std::sync::Arc; - -use re_log_types::{ApplicationId, LogMsg, RecordingId, RecordingInfo, RecordingSource, Time}; - -use crate::sink::LogSink; - -// ---------------------------------------------------------------------------- - -/// Construct a [`Session`]. -/// -/// ``` no_run -/// # use re_sdk::SessionBuilder; -/// let session = SessionBuilder::new("my_app").save("my_recording.rrd")?; -/// # Ok::<(), Box>(()) -/// ``` -#[must_use] -pub struct SessionBuilder { - application_id: ApplicationId, - is_official_example: bool, - enabled: Option, - default_enabled: bool, - recording_id: Option, -} - -impl SessionBuilder { - /// Create a new [`SessionBuilder`] with an application id. - /// - /// The application id is usually the name of your app. - /// - /// ``` no_run - /// # use re_sdk::SessionBuilder; - /// let session = SessionBuilder::new("my_app").save("my_recording.rrd")?; - /// # Ok::<(), Box>(()) - /// ``` - #[track_caller] // track_caller so that we can see if we are being called from an official example. - pub fn new(application_id: impl Into) -> Self { - let application_id = application_id.into(); - let is_official_example = crate::called_from_official_rust_example(); - - Self { - application_id, - is_official_example, - enabled: None, - default_enabled: true, - recording_id: None, - } - } - - /// Set whether or not Rerun is enabled by default. - /// - /// If the `RERUN` environment variable is set, it will override this. - /// - /// Set also: [`Self::enabled`]. - pub fn default_enabled(mut self, default_enabled: bool) -> Self { - self.default_enabled = default_enabled; - self - } - - /// Set whether or not Rerun is enabled. - /// - /// Setting this will ignore the `RERUN` environment variable. - /// - /// Set also: [`Self::default_enabled`]. - pub fn enabled(mut self, enabled: bool) -> Self { - self.enabled = Some(enabled); - self - } - - /// Set the [`RecordingId`] for this session. - /// - /// If you're logging from multiple processes and want all the messages - /// to end up as the same recording, you must make sure they all set the same - /// [`RecordingId`] using this function. - /// - /// Note that many recordings can share the same [`ApplicationId`], but - /// they all have unique [`RecordingId`]s. - /// - /// The default is to use a random [`RecordingId`]. - pub fn recording_id(mut self, recording_id: RecordingId) -> Self { - self.recording_id = Some(recording_id); - self - } - - /// Buffer log messages in RAM. - /// - /// Retrieve them later with [`Session::drain_backlog`]. - pub fn buffered(self) -> Session { - let (rerun_enabled, recording_info) = self.finalize(); - if rerun_enabled { - Session::buffered(recording_info) - } else { - re_log::debug!("Rerun disabled - call to buffered() ignored"); - Session::disabled() - } - } - - /// Send log data to a remote viewer/server. - /// - /// Usually this is done by running the `rerun` binary (`cargo install rerun`) without arguments, - /// and then connecting to it. - /// - /// Send all currently buffered messages. - /// If we are already connected, we will re-connect to this new address. - /// - /// This function returns immediately. - /// - /// ## Example: - /// - /// ``` no_run - /// let session = re_sdk::SessionBuilder::new("my_app").connect(re_sdk::default_server_addr()); - /// ``` - pub fn connect(self, addr: std::net::SocketAddr) -> Session { - let (rerun_enabled, recording_info) = self.finalize(); - if rerun_enabled { - Session::new( - recording_info, - Box::new(crate::log_sink::TcpSink::new(addr)), - ) - } else { - re_log::debug!("Rerun disabled - call to connect() ignored"); - Session::disabled() - } - } - - /// Stream all log messages to an `.rrd` file. - /// - /// ``` no_run - /// # use re_sdk::SessionBuilder; - /// let session = SessionBuilder::new("my_app").save("my_recording.rrd")?; - /// # Ok::<(), Box>(()) - /// ``` - #[cfg(not(target_arch = "wasm32"))] - pub fn save( - self, - path: impl Into, - ) -> Result { - let (rerun_enabled, recording_info) = self.finalize(); - if rerun_enabled { - Ok(Session::new( - recording_info, - Box::new(crate::sink::FileSink::new(path)?), - )) - } else { - re_log::debug!("Rerun disabled - call to save() ignored"); - Ok(Session::disabled()) - } - } - - /// Returns whether or not logging is enabled, plus a [`RecordingInfo`]. - /// - /// This can be used to then construct a [`Session`] manually using [`Session::new`]. - pub fn finalize(self) -> (bool, RecordingInfo) { - let Self { - application_id, - is_official_example, - enabled, - default_enabled, - recording_id, - } = self; - - let enabled = enabled.unwrap_or_else(|| crate::decide_logging_enabled(default_enabled)); - let recording_id = recording_id.unwrap_or_else(RecordingId::random); - - let recording_info = RecordingInfo { - application_id, - recording_id, - is_official_example, - started: Time::now(), - recording_source: RecordingSource::RustSdk { - rustc_version: env!("RE_BUILD_RUSTC_VERSION").into(), - llvm_version: env!("RE_BUILD_LLVM_VERSION").into(), - }, - }; - - (enabled, recording_info) - } -} - -// ---------------------------------------------------------------------------- - -/// The main way to do Rerun loggning. -/// -/// You can construct a [`Session`] with [`SessionBuilder`] or [`Session::new`]. -/// -/// Cloning a [`Session`] is cheap (it's a shallow clone). -/// The clone will send its messages to the same sink as the prototype. -/// -/// `Session` also implements `Send` and `Sync`. -#[must_use] -#[derive(Clone)] -pub struct Session { - recording_info: RecordingInfo, - sink: Arc, - // TODO(emilk): add convenience `TimePoint` here so that users can - // do things like `session.set_time_sequence("frame", frame_idx);` -} - -#[test] -fn session_impl_send_sync() { - fn assert_send_sync() {} - assert_send_sync::(); -} - -impl Session { - /// Construct a new session with a given [`RecordingInfo`] and [`LogSink`]. - /// - /// You can create a [`RecordingInfo`] with [`crate::new_recording_info`]; - /// - /// The [`RecordingInfo`] is immediately sent to the sink in the form of a - /// [`re_log_types::BeginRecordingMsg`]. - /// - /// You can find sinks in [`crate::sink`]. - /// - /// See also: [`SessionBuilder`]. - pub fn new(recording_info: RecordingInfo, sink: Box) -> Self { - if sink.is_enabled() { - re_log::debug!( - "Beginning new recording with application_id {:?} and recording id {}", - recording_info.application_id.0, - recording_info.recording_id - ); - - sink.send( - re_log_types::BeginRecordingMsg { - row_id: re_log_types::RowId::random(), - info: recording_info.clone(), - } - .into(), - ); - } - - Self { - recording_info, - sink: sink.into(), - } - } - - /// Construct a new session with a disabled "dummy" sink that drops all logging messages. - /// - /// [`Self::is_enabled`] will return `false`. - pub fn disabled() -> Self { - Self { - recording_info: RecordingInfo { - application_id: ApplicationId::unknown(), - recording_id: Default::default(), - is_official_example: crate::called_from_official_rust_example(), - started: Time::now(), - recording_source: RecordingSource::RustSdk { - rustc_version: env!("RE_BUILD_RUSTC_VERSION").into(), - llvm_version: env!("RE_BUILD_LLVM_VERSION").into(), - }, - }, - sink: crate::sink::disabled().into(), - } - } - - /// Buffer log messages in RAM. - /// - /// Retrieve them later with [`Self::drain_backlog`]. - pub fn buffered(recording_info: RecordingInfo) -> Self { - Self::new(recording_info, Box::new(crate::sink::BufferedSink::new())) - } - - /// Check if logging is enabled on this `Session`. - /// - /// If not, all logging calls will be ignored. - pub fn is_enabled(&self) -> bool { - self.sink.is_enabled() - } - - /// Access the underlying log sink to where we send out log messages. - pub fn sink(&self) -> &Arc { - &self.sink - } - - /// Send a [`LogMsg`]. - pub fn send(&self, log_msg: LogMsg) { - self.sink.send(log_msg); - } - - /// Send a [`re_log_types::PathOp`]. - /// - /// This is a convenience wrapper for [`Self::send`]. - pub fn send_path_op( - &self, - time_point: &re_log_types::TimePoint, - path_op: re_log_types::PathOp, - ) { - self.send(LogMsg::EntityPathOpMsg( - self.recording_id(), - re_log_types::EntityPathOpMsg { - row_id: re_log_types::RowId::random(), - time_point: time_point.clone(), - path_op, - }, - )); - } - - /// Drain all buffered [`LogMsg`]es and return them. - pub fn drain_backlog(&self) -> Vec { - self.sink.drain_backlog() - } - - /// The current [`RecordingId`]. - pub fn recording_id(&self) -> RecordingId { - self.recording_info.recording_id - } -} - -impl AsRef for Session { - fn as_ref(&self) -> &dyn LogSink { - self.sink.as_ref() - } -} - -impl std::borrow::Borrow for Session { - fn borrow(&self) -> &dyn LogSink { - self.sink.as_ref() - } -} diff --git a/crates/rerun/src/clap.rs b/crates/rerun/src/clap.rs index fddb1de26aa6..76594ef1cea7 100644 --- a/crates/rerun/src/clap.rs +++ b/crates/rerun/src/clap.rs @@ -2,13 +2,12 @@ use std::{net::SocketAddr, path::PathBuf}; +use re_sdk::RecordingStream; #[cfg(feature = "web_viewer")] use re_web_viewer_server::WebViewerServerPort; #[cfg(feature = "web_viewer")] use re_ws_comms::RerunServerPort; -use crate::Session; - // --- #[derive(Debug, Clone, PartialEq, Eq)] @@ -24,6 +23,9 @@ enum RerunBehavior { Spawn, } +// TODO(cmc): There are definitely ways of making this all nicer now (this, native_viewer and +// web_viewer).. but one thing at a time. + /// This struct implements a `clap::Parser` that defines all the arguments that a typical Rerun /// application might use, and provides helpers to evaluate those arguments and behave /// consequently. @@ -67,7 +69,7 @@ pub struct RerunArgs { } impl RerunArgs { - /// Set up Rerun, and run the given code with a [`Session`] object + /// Set up Rerun, and run the given code with a [`RecordingStream`] object /// that can be used to log data. /// /// Logging will be controlled by the `RERUN` environment variable, @@ -77,7 +79,7 @@ impl RerunArgs { &self, application_id: &str, default_enabled: bool, - run: impl FnOnce(Session) + Send + 'static, + run: impl FnOnce(RecordingStream) + Send + 'static, ) -> anyhow::Result<()> { // Ensure we have a running tokio runtime. #[allow(unused_assignments)] @@ -91,12 +93,13 @@ impl RerunArgs { }; let _tokio_runtime_guard = tokio_runtime_handle.enter(); - let (rerun_enabled, recording_info) = crate::SessionBuilder::new(application_id) - .default_enabled(default_enabled) - .finalize(); + let (rerun_enabled, recording_info, batcher_config) = + crate::RecordingStreamBuilder::new(application_id) + .default_enabled(default_enabled) + .finalize(); if !rerun_enabled { - run(Session::disabled()); + run(RecordingStream::disabled()); return Ok(()); } @@ -117,14 +120,15 @@ impl RerunArgs { #[cfg(feature = "native_viewer")] RerunBehavior::Spawn => { - crate::native_viewer::spawn(recording_info, run)?; + crate::native_viewer::spawn(recording_info, batcher_config, run)?; return Ok(()); } }; - let session = Session::new(recording_info, sink); - let _sink = session.sink().clone(); // Keep sink (and potential associated servers) alive until the end of this function scope. - run(session); + let rec_stream = RecordingStream::new(recording_info, batcher_config, sink)?; + run(rec_stream.clone()); + + rec_stream.flush_async(); #[cfg(feature = "web_viewer")] if matches!(self.to_behavior(), Ok(RerunBehavior::Serve)) { diff --git a/crates/rerun/src/lib.rs b/crates/rerun/src/lib.rs index ae017c1c4fd9..c649cbebc459 100644 --- a/crates/rerun/src/lib.rs +++ b/crates/rerun/src/lib.rs @@ -31,7 +31,7 @@ //! # fn capture_image() -> image::DynamicImage { Default::default() } //! # fn positions() -> Vec { Default::default() } //! # fn colors() -> Vec { Default::default() } -//! let mut rr_session = rerun::SessionBuilder::new("my_app").buffered(); +//! let rec_stream = rerun::RecordingStreamBuilder::new("my_app").buffered()?; //! //! let points: Vec = positions(); //! let colors: Vec = colors(); @@ -40,16 +40,16 @@ //! rerun::MsgSender::new("points") //! .with_component(&points)? //! .with_component(&colors)? -//! .send(&mut rr_session)?; +//! .send(&rec_stream)?; //! //! rerun::MsgSender::new("image") //! .with_component(&[rerun::components::Tensor::from_image(image)?])? -//! .send(&mut rr_session)?; +//! .send(&rec_stream)?; //! //! # Ok::<(), Box>(()) //! ``` //! -//! See [`Session`] and [`MsgSender`] for details. +//! See [`RecordingStream`] and [`MsgSender`] for details. //! //! #### Streaming //! To stream log data to an awaiting `rerun` process, you can do this: @@ -57,18 +57,20 @@ //! //! Then do this: //! -//! ``` no_run -//! let mut rr_session = rerun::SessionBuilder::new("my_app").connect(rerun::default_server_addr()); +//! ```no_run +//! let rec_stream = rerun::RecordingStreamBuilder::new("my_app").connect(rerun::default_server_addr()); //! ``` //! //! #### Buffering //! -//! ``` no_run -//! # fn log_using(rr_session: &rerun::Session) {} +//! ```no_run +//! # fn log_using(rec_stream: &rerun::RecordingStream) {} //! -//! let mut rr_session = rerun::SessionBuilder::new("my_app").buffered(); -//! log_using(&mut rr_session); -//! rerun::native_viewer::show(&mut rr_session); +//! let (rec_stream, storage) = rerun::RecordingStreamBuilder::new("my_app").memory()?; +//! log_using(&rec_stream); +//! rerun::native_viewer::show(storage.take()); +//! +//! # Ok::<(), Box>(()) //! ``` //! //! ## Binary diff --git a/crates/rerun/src/native_viewer.rs b/crates/rerun/src/native_viewer.rs index e1d5fc1de4e0..b36ba9d1dc1a 100644 --- a/crates/rerun/src/native_viewer.rs +++ b/crates/rerun/src/native_viewer.rs @@ -1,9 +1,9 @@ use re_log_types::LogMsg; use re_log_types::RecordingInfo; -use re_sdk::Session; +use re_sdk::RecordingStream; /// Starts a Rerun viewer on the current thread and migrates the given callback, along with -/// the active `Session`, to a newly spawned thread where the callback will run until +/// the active `RecordingStream`, to a newly spawned thread where the callback will run until /// completion. /// /// All messages logged from the passed-in callback will be streamed to the viewer in @@ -14,21 +14,26 @@ use re_sdk::Session; /// ⚠️ This function must be called from the main thread since some platforms require that /// their UI runs on the main thread! ⚠️ #[cfg(not(target_arch = "wasm32"))] -pub fn spawn(recording_info: RecordingInfo, run: F) -> re_viewer::external::eframe::Result<()> +pub fn spawn( + recording_info: RecordingInfo, + batcher_config: re_log_types::DataTableBatcherConfig, + run: F, +) -> re_viewer::external::eframe::Result<()> where - F: FnOnce(Session) + Send + 'static, + F: FnOnce(RecordingStream) + Send + 'static, { let (tx, rx) = re_smart_channel::smart_channel(re_smart_channel::Source::Sdk); let sink = Box::new(NativeViewerSink(tx)); let app_env = re_viewer::AppEnvironment::from_recording_source(&recording_info.recording_source); - let session = Session::new(recording_info, sink); + let rec_stream = + RecordingStream::new(recording_info, batcher_config, sink).expect("Failed to spawn thread"); // NOTE: Forget the handle on purpose, leave that thread be. std::thread::Builder::new() .name("spawned".into()) - .spawn(move || run(session)) + .spawn(move || run(rec_stream)) .expect("Failed to spawn thread"); // NOTE: Some platforms still mandate that the UI must run on the main thread, so make sure @@ -49,32 +54,28 @@ where })) } -/// Drains all pending log messages and starts a Rerun viewer to visualize -/// everything that has been logged so far. -/// +/// Starts a Rerun viewer to visualize the contents of a given array of messages. /// The method will return when the viewer is closed. /// -/// You should use this method together with [`Session::buffered`]; -/// /// ⚠️ This function must be called from the main thread since some platforms require that /// their UI runs on the main thread! ⚠️ -pub fn show(session: &Session) -> re_viewer::external::eframe::Result<()> { - if !session.is_enabled() { - re_log::debug!("Rerun disabled - call to show() ignored"); +pub fn show(msgs: Vec) -> re_viewer::external::eframe::Result<()> { + if msgs.is_empty() { + re_log::debug!("Empty array of msgs - call to show() ignored"); return Ok(()); } + let recording_source = re_log_types::RecordingSource::RustSdk { rustc_version: env!("RE_BUILD_RUSTC_VERSION").into(), llvm_version: env!("RE_BUILD_LLVM_VERSION").into(), }; - let log_messages = session.drain_backlog(); let startup_options = re_viewer::StartupOptions::default(); re_viewer::run_native_viewer_with_messages( re_build_info::build_info!(), re_viewer::AppEnvironment::from_recording_source(&recording_source), startup_options, - log_messages, + msgs, ) } diff --git a/examples/rust/api_demo/src/main.rs b/examples/rust/api_demo/src/main.rs index abc6ff2e873e..32b9cd141beb 100644 --- a/examples/rust/api_demo/src/main.rs +++ b/examples/rust/api_demo/src/main.rs @@ -29,7 +29,7 @@ use rerun::{ re_log_types::external::{arrow2, arrow2_convert}, }, time::{Time, TimePoint, TimeType, Timeline}, - Component, ComponentName, EntityPath, MsgSender, Session, + Component, ComponentName, EntityPath, MsgSender, RecordingStream, }; // --- Rerun logging --- @@ -40,7 +40,7 @@ fn sim_time(at: f64) -> TimePoint { [(timeline_sim_time, time.into())].into() } -fn demo_bbox(session: &Session) -> anyhow::Result<()> { +fn demo_bbox(rec_stream: &RecordingStream) -> anyhow::Result<()> { MsgSender::new("bbox_demo/bbox") .with_timepoint(sim_time(0 as _)) .with_component(&[Box3D::new(1.0, 0.5, 0.25)])? @@ -51,7 +51,7 @@ fn demo_bbox(session: &Session) -> anyhow::Result<()> { .with_component(&[ColorRGBA::from_rgb(0, 255, 0)])? .with_component(&[Radius(0.005)])? .with_component(&[Label("box/t0".to_owned())])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("bbox_demo/bbox") .with_timepoint(sim_time(1 as _)) @@ -63,17 +63,17 @@ fn demo_bbox(session: &Session) -> anyhow::Result<()> { .with_component(&[ColorRGBA::from_rgb(255, 255, 0)])? .with_component(&[Radius(0.01)])? .with_component(&[Label("box/t1".to_owned())])? - .send(session)?; + .send(rec_stream)?; Ok(()) } -fn demo_extension_components(session: &Session) -> anyhow::Result<()> { +fn demo_extension_components(rec_stream: &RecordingStream) -> anyhow::Result<()> { // Hack to establish 2d view bounds MsgSender::new("extension_components") .with_timepoint(sim_time(0 as _)) .with_component(&[Rect2D::from_xywh(0.0, 0.0, 128.0, 128.0)])? - .send(session)?; + .send(rec_stream)?; // Separate extension component // TODO(cmc): not that great to have to dig around for arrow2-* reexports :/ @@ -95,7 +95,7 @@ fn demo_extension_components(session: &Session) -> anyhow::Result<()> { .with_component(&[Point2D::new(64.0, 64.0)])? .with_component(&[ColorRGBA::from_rgb(255, 0, 0)])? .with_component(&[Confidence(0.9)])? - .send(session)?; + .send(rec_stream)?; // Batch points with extension @@ -136,15 +136,15 @@ fn demo_extension_components(session: &Session) -> anyhow::Result<()> { Corner("lower right".into()), ])? .with_splat(Training(true))? - .send(session)?; + .send(rec_stream)?; Ok(()) } -fn demo_log_cleared(session: &Session) -> anyhow::Result<()> { +fn demo_log_cleared(rec_stream: &RecordingStream) -> anyhow::Result<()> { // TODO(cmc): need abstractions for this fn log_cleared( - session: &Session, + rec_stream: &RecordingStream, timepoint: &TimePoint, ent_path: impl Into, recursive: bool, @@ -155,7 +155,7 @@ fn demo_log_cleared(session: &Session) -> anyhow::Result<()> { (Timeline::log_time(), Time::now().into()), (*tp[0].0, *tp[0].1), ]; - session.send_path_op(&timepoint.into(), PathOp::clear(recursive, ent_path.into())); + rec_stream.record_path_op(timepoint.into(), PathOp::clear(recursive, ent_path.into())); } // sim_time = 1 @@ -164,46 +164,46 @@ fn demo_log_cleared(session: &Session) -> anyhow::Result<()> { .with_component(&[Rect2D::from_xywh(5.0, 5.0, 4.0, 4.0)])? .with_component(&[ColorRGBA::from_rgb(255, 0, 0)])? .with_component(&[Label("Rect1".into())])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("null_demo/rect/1") .with_timepoint(sim_time(1 as _)) .with_component(&[Rect2D::from_xywh(10.0, 5.0, 4.0, 4.0)])? .with_component(&[ColorRGBA::from_rgb(0, 255, 0)])? .with_component(&[Label("Rect2".into())])? - .send(session)?; + .send(rec_stream)?; // sim_time = 2 - log_cleared(session, &sim_time(2 as _), "null_demo/rect/0", false); + log_cleared(rec_stream, &sim_time(2 as _), "null_demo/rect/0", false); // sim_time = 3 - log_cleared(session, &sim_time(3 as _), "null_demo/rect", true); + log_cleared(rec_stream, &sim_time(3 as _), "null_demo/rect", true); // sim_time = 4 MsgSender::new("null_demo/rect/0") .with_timepoint(sim_time(4 as _)) .with_component(&[Rect2D::from_xywh(5.0, 5.0, 4.0, 4.0)])? - .send(session)?; + .send(rec_stream)?; // sim_time = 5 MsgSender::new("null_demo/rect/1") .with_timepoint(sim_time(5 as _)) .with_component(&[Rect2D::from_xywh(10.0, 5.0, 4.0, 4.0)])? - .send(session)?; + .send(rec_stream)?; Ok(()) } -fn demo_3d_points(session: &Session) -> anyhow::Result<()> { +fn demo_3d_points(rec_stream: &RecordingStream) -> anyhow::Result<()> { MsgSender::new("3d_points/single_point_unlabeled") .with_timepoint(sim_time(1 as _)) .with_component(&[Point3D::new(10.0, 0.0, 0.0)])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("3d_points/single_point_labeled") .with_timepoint(sim_time(1 as _)) .with_component(&[Point3D::new(0.0, 0.0, 0.0)])? .with_component(&[Label("labeled point".to_owned())])? - .send(session)?; + .send(rec_stream)?; fn create_points( n: usize, @@ -232,7 +232,7 @@ fn demo_3d_points(session: &Session) -> anyhow::Result<()> { .with_component(&points)? .with_component(&labels)? .with_component(&radii)? - .send(session)?; + .send(rec_stream)?; let (labels, points, _, colors) = create_points(100, |x| x * 5.0, |y| y * 5.0 - 10.0, |z| z * 0.4 - 5.0); @@ -241,12 +241,12 @@ fn demo_3d_points(session: &Session) -> anyhow::Result<()> { .with_component(&points)? .with_component(&labels)? .with_component(&colors)? - .send(session)?; + .send(rec_stream)?; Ok(()) } -fn demo_rects(session: &Session) -> anyhow::Result<()> { +fn demo_rects(rec_stream: &RecordingStream) -> anyhow::Result<()> { use ndarray::prelude::*; use ndarray_rand::{rand_distr::Uniform, RandomExt as _}; @@ -255,7 +255,7 @@ fn demo_rects(session: &Session) -> anyhow::Result<()> { MsgSender::new("rects_demo/img") .with_timepoint(sim_time(1 as _)) .with_component(&[Tensor::try_from(img.as_standard_layout().view())?])? - .send(session)?; + .send(rec_stream)?; // 20 random rectangles // TODO(cmc): shouldn't have to collect, need to fix the "must have a ref" thingy @@ -273,28 +273,32 @@ fn demo_rects(session: &Session) -> anyhow::Result<()> { .with_timepoint(sim_time(2 as _)) .with_component(&rects)? .with_component(&colors)? - .send(session)?; + .send(rec_stream)?; // Clear the rectangles by logging an empty set MsgSender::new("rects_demo/rects") .with_timepoint(sim_time(3 as _)) .with_component(&Vec::::new())? - .send(session)?; + .send(rec_stream)?; Ok(()) } -fn demo_segmentation(session: &Session) -> anyhow::Result<()> { +fn demo_segmentation(rec_stream: &RecordingStream) -> anyhow::Result<()> { // TODO(cmc): All of these text logs should really be going through `re_log` and automagically // fed back into rerun via a `tracing` backend. At the _very_ least we should have a helper // available for this. // In either case, this raises the question of tracking time at the SDK level, akin to what the // python SDK does. - fn log_info(session: &Session, timepoint: TimePoint, text: &str) -> anyhow::Result<()> { + fn log_info( + rec_stream: &RecordingStream, + timepoint: TimePoint, + text: &str, + ) -> anyhow::Result<()> { MsgSender::new("logs/seg_demo_log") .with_timepoint(timepoint) .with_component(&[TextEntry::new(text, Some("INFO".into()))])? - .send(session) + .send(rec_stream) .map_err(Into::into) } @@ -310,20 +314,20 @@ fn demo_segmentation(session: &Session) -> anyhow::Result<()> { MsgSender::new("seg_demo/img") .with_timepoint(sim_time(1 as _)) .with_component(&[tensor])? - .send(session)?; + .send(rec_stream)?; // Log a bunch of classified 2D points MsgSender::new("seg_demo/single_point") .with_timepoint(sim_time(1 as _)) .with_component(&[Point2D::new(64.0, 64.0)])? .with_component(&[ClassId(13)])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("seg_demo/single_point_labeled") .with_timepoint(sim_time(1 as _)) .with_component(&[Point2D::new(90.0, 50.0)])? .with_component(&[ClassId(13)])? .with_component(&[Label("labeled point".into())])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("seg_demo/several_points0") .with_timepoint(sim_time(1 as _)) .with_component(&[ @@ -332,7 +336,7 @@ fn demo_segmentation(session: &Session) -> anyhow::Result<()> { Point2D::new(60.0, 30.0), ])? .with_splat(ClassId(42))? - .send(session)?; + .send(rec_stream)?; MsgSender::new("seg_demo/several_points1") .with_timepoint(sim_time(1 as _)) .with_component(&[ @@ -341,7 +345,7 @@ fn demo_segmentation(session: &Session) -> anyhow::Result<()> { Point2D::new(80.0, 30.0), ])? .with_component(&[ClassId(13), ClassId(42), ClassId(99)])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("seg_demo/many points") .with_timepoint(sim_time(1 as _)) .with_component( @@ -350,9 +354,9 @@ fn demo_segmentation(session: &Session) -> anyhow::Result<()> { .collect::>(), )? .with_splat(ClassId(42))? - .send(session)?; + .send(rec_stream)?; log_info( - session, + rec_stream, sim_time(1 as _), "no rects, default colored points, a single point has a label", )?; @@ -388,9 +392,9 @@ fn demo_segmentation(session: &Session) -> anyhow::Result<()> { .into_iter() .collect(), }])? - .send(session)?; + .send(rec_stream)?; log_info( - session, + rec_stream, sim_time(2 as _), "default colored rects, default colored points, all points except the \ bottom right clusters have labels", @@ -408,9 +412,9 @@ fn demo_segmentation(session: &Session) -> anyhow::Result<()> { .into_iter() .collect(), }])? - .send(session)?; + .send(rec_stream)?; log_info( - session, + rec_stream, sim_time(3 as _), "points/rects with user specified colors", )?; @@ -427,9 +431,9 @@ fn demo_segmentation(session: &Session) -> anyhow::Result<()> { .into_iter() .collect(), }])? - .send(session)?; + .send(rec_stream)?; log_info( - session, + rec_stream, sim_time(4 as _), "label1 disappears and everything with label3 is now default colored again", )?; @@ -437,7 +441,7 @@ fn demo_segmentation(session: &Session) -> anyhow::Result<()> { Ok(()) } -fn demo_text_logs(session: &Session) -> anyhow::Result<()> { +fn demo_text_logs(rec_stream: &RecordingStream) -> anyhow::Result<()> { // TODO(cmc): the python SDK has some magic that glues the standard logger directly into rerun // logs; we're gonna need something similar for rust (e.g. `tracing` backend). @@ -448,7 +452,7 @@ fn demo_text_logs(session: &Session) -> anyhow::Result<()> { .with_timepoint(sim_time(0 as _)) .with_component(&[TextEntry::new("Text with explicitly set color", None)])? .with_component(&[ColorRGBA::from_rgb(255, 215, 0)])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("logs") .with_timepoint(sim_time(0 as _)) @@ -456,12 +460,12 @@ fn demo_text_logs(session: &Session) -> anyhow::Result<()> { "this entry has loglevel TRACE", Some("TRACE".into()), )])? - .send(session)?; + .send(rec_stream)?; Ok(()) } -fn demo_transforms_3d(session: &Session) -> anyhow::Result<()> { +fn demo_transforms_3d(rec_stream: &RecordingStream) -> anyhow::Result<()> { let sun_to_planet_distance = 6.0; let planet_to_moon_distance = 3.0; let rotation_speed_planet = 2.0; @@ -469,7 +473,7 @@ fn demo_transforms_3d(session: &Session) -> anyhow::Result<()> { // Planetary motion is typically in the XY plane. fn log_coordinate_space( - session: &Session, + rec_stream: &RecordingStream, ent_path: impl Into, ) -> anyhow::Result<()> { let view_coords = ViewCoordinates::from_up_and_handedness( @@ -480,17 +484,17 @@ fn demo_transforms_3d(session: &Session) -> anyhow::Result<()> { .with_timeless(true) .with_component(&[view_coords])? .with_component(&[ColorRGBA::from_rgb(255, 215, 0)])? - .send(session) + .send(rec_stream) .map_err(Into::into) } - log_coordinate_space(session, "transforms3d")?; - log_coordinate_space(session, "transforms3d/sun")?; - log_coordinate_space(session, "transforms3d/sun/planet")?; - log_coordinate_space(session, "transforms3d/sun/planet/moon")?; + log_coordinate_space(rec_stream, "transforms3d")?; + log_coordinate_space(rec_stream, "transforms3d/sun")?; + log_coordinate_space(rec_stream, "transforms3d/sun/planet")?; + log_coordinate_space(rec_stream, "transforms3d/sun/planet/moon")?; // All are in the center of their own space: fn log_point( - session: &Session, + rec_stream: &RecordingStream, ent_path: impl Into, radius: f32, color: [u8; 3], @@ -500,13 +504,13 @@ fn demo_transforms_3d(session: &Session) -> anyhow::Result<()> { .with_component(&[Point3D::ZERO])? .with_component(&[Radius(radius)])? .with_component(&[ColorRGBA::from_rgb(color[0], color[1], color[2])])? - .send(session) + .send(rec_stream) .map_err(Into::into) } - log_point(session, "transforms3d/sun", 1.0, [255, 200, 10])?; - log_point(session, "transforms3d/sun/planet", 0.4, [40, 80, 200])?; + log_point(rec_stream, "transforms3d/sun", 1.0, [255, 200, 10])?; + log_point(rec_stream, "transforms3d/sun/planet", 0.4, [40, 80, 200])?; log_point( - session, + rec_stream, "transforms3d/sun/planet/moon", 0.15, [180, 180, 180], @@ -533,7 +537,7 @@ fn demo_transforms_3d(session: &Session) -> anyhow::Result<()> { .with_component(&points)? .with_splat(Radius(0.025))? .with_splat(ColorRGBA::from_rgb(80, 80, 80))? - .send(session)?; + .send(rec_stream)?; // paths where the planet & moon move let create_path = |distance: f32| { @@ -549,11 +553,11 @@ fn demo_transforms_3d(session: &Session) -> anyhow::Result<()> { MsgSender::new("transforms3d/sun/planet_path") .with_timepoint(sim_time(0 as _)) .with_component(&[create_path(sun_to_planet_distance)])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("transforms3d/sun/planet/moon_path") .with_timepoint(sim_time(0 as _)) .with_component(&[create_path(planet_to_moon_distance)])? - .send(session)?; + .send(rec_stream)?; for i in 0..6 * 120 { let time = i as f32 / 120.0; @@ -571,7 +575,7 @@ fn demo_transforms_3d(session: &Session) -> anyhow::Result<()> { 0.0, ), })])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("transforms3d/sun/planet/moon") .with_timepoint(sim_time(time as _)) @@ -583,7 +587,7 @@ fn demo_transforms_3d(session: &Session) -> anyhow::Result<()> { 0.0, ), })])? - .send(session)?; + .send(rec_stream)?; } Ok(()) @@ -629,7 +633,7 @@ struct Args { demo: Option>, } -fn run(session: &Session, args: &Args) -> anyhow::Result<()> { +fn run(rec_stream: &RecordingStream, args: &Args) -> anyhow::Result<()> { use clap::ValueEnum as _; let demos: HashSet = args.demo.as_ref().map_or_else( || Demo::value_variants().iter().copied().collect(), @@ -638,14 +642,14 @@ fn run(session: &Session, args: &Args) -> anyhow::Result<()> { for demo in demos { match demo { - Demo::BoundingBox => demo_bbox(session)?, - Demo::ExtensionComponents => demo_extension_components(session)?, - Demo::LogCleared => demo_log_cleared(session)?, - Demo::Points3D => demo_3d_points(session)?, - Demo::Rects => demo_rects(session)?, - Demo::Segmentation => demo_segmentation(session)?, - Demo::TextLogs => demo_text_logs(session)?, - Demo::Transforms3D => demo_transforms_3d(session)?, + Demo::BoundingBox => demo_bbox(rec_stream)?, + Demo::ExtensionComponents => demo_extension_components(rec_stream)?, + Demo::LogCleared => demo_log_cleared(rec_stream)?, + Demo::Points3D => demo_3d_points(rec_stream)?, + Demo::Rects => demo_rects(rec_stream)?, + Demo::Segmentation => demo_segmentation(rec_stream)?, + Demo::TextLogs => demo_text_logs(rec_stream)?, + Demo::Transforms3D => demo_transforms_3d(rec_stream)?, } } @@ -661,7 +665,7 @@ fn main() -> anyhow::Result<()> { let default_enabled = true; args.rerun .clone() - .run("api_demo_rs", default_enabled, move |session| { - run(&session, &args).unwrap(); + .run("api_demo_rs", default_enabled, move |rec_stream| { + run(&rec_stream, &args).unwrap(); }) } diff --git a/examples/rust/dna/src/main.rs b/examples/rust/dna/src/main.rs index 488808886119..ecc97445c41c 100644 --- a/examples/rust/dna/src/main.rs +++ b/examples/rust/dna/src/main.rs @@ -9,20 +9,20 @@ use rerun::{ demo_util::{bounce_lerp, color_spiral}, external::glam, time::{Time, TimeType, Timeline}, - MsgSender, MsgSenderError, Session, + MsgSender, MsgSenderError, RecordingStream, }; const NUM_POINTS: usize = 100; fn main() -> Result<(), Box> { let recording_info = rerun::new_recording_info("DNA Abacus"); - rerun::native_viewer::spawn(recording_info, |session| { - run(&session).unwrap(); + rerun::native_viewer::spawn(recording_info, Default::default(), |rec_stream| { + run(&rec_stream).unwrap(); })?; Ok(()) } -fn run(session: &Session) -> Result<(), MsgSenderError> { +fn run(rec_stream: &RecordingStream) -> Result<(), MsgSenderError> { let stable_time = Timeline::new("stable_time", TimeType::Time); let (points1, colors1) = color_spiral(NUM_POINTS, 2.0, 0.02, 0.0, 0.1); @@ -33,14 +33,14 @@ fn run(session: &Session) -> Result<(), MsgSenderError> { .with_component(&points1.iter().copied().map(Point3D::from).collect_vec())? .with_component(&colors1.iter().copied().map(ColorRGBA::from).collect_vec())? .with_splat(Radius(0.08))? - .send(session)?; + .send(rec_stream)?; MsgSender::new("dna/structure/right") .with_time(stable_time, 0) .with_component(&points2.iter().copied().map(Point3D::from).collect_vec())? .with_component(&colors2.iter().copied().map(ColorRGBA::from).collect_vec())? .with_splat(Radius(0.08))? - .send(session)?; + .send(rec_stream)?; let scaffolding = points1 .iter() @@ -55,7 +55,7 @@ fn run(session: &Session) -> Result<(), MsgSenderError> { .with_time(stable_time, 0) .with_component(&scaffolding)? .with_splat(ColorRGBA::from([128, 128, 128, 255]))? - .send(session)?; + .send(rec_stream)?; use rand::Rng as _; let mut rng = rand::thread_rng(); @@ -86,7 +86,7 @@ fn run(session: &Session) -> Result<(), MsgSenderError> { .with_component(&beads)? .with_component(&colors)? .with_splat(Radius(0.06))? - .send(session)?; + .send(rec_stream)?; MsgSender::new("dna/structure") .with_time(stable_time, Time::from_seconds_since_epoch(time as _)) @@ -97,7 +97,7 @@ fn run(session: &Session) -> Result<(), MsgSenderError> { )), ..Default::default() })])? - .send(session)?; + .send(rec_stream)?; } Ok(()) diff --git a/examples/rust/minimal/src/main.rs b/examples/rust/minimal/src/main.rs index 6fdce0ef1a48..6909c6d2b486 100644 --- a/examples/rust/minimal/src/main.rs +++ b/examples/rust/minimal/src/main.rs @@ -4,11 +4,11 @@ use rerun::{ components::{ColorRGBA, Point3D}, demo_util::grid, external::glam, - MsgSender, SessionBuilder, + MsgSender, RecordingStreamBuilder, }; fn main() -> Result<(), Box> { - let session = SessionBuilder::new("minimal_rs").buffered(); + let (rec_stream, storage) = RecordingStreamBuilder::new("minimal_rs").memory()?; let points = grid(glam::Vec3::splat(-5.0), glam::Vec3::splat(5.0), 10) .map(Point3D::from) @@ -20,9 +20,11 @@ fn main() -> Result<(), Box> { MsgSender::new("my_points") .with_component(&points)? .with_component(&colors)? - .send(&session)?; + .send(&rec_stream)?; - rerun::native_viewer::show(&session)?; + rec_stream.flush_blocking(); + + rerun::native_viewer::show(storage.take())?; Ok(()) } diff --git a/examples/rust/minimal_options/src/main.rs b/examples/rust/minimal_options/src/main.rs index c6045998e498..726ef1650ee3 100644 --- a/examples/rust/minimal_options/src/main.rs +++ b/examples/rust/minimal_options/src/main.rs @@ -7,7 +7,7 @@ use rerun::components::{ColorRGBA, Point3D}; use rerun::time::{TimeType, Timeline}; -use rerun::{external::re_log, MsgSender, Session}; +use rerun::{external::re_log, MsgSender, RecordingStream}; use rerun::demo_util::grid; @@ -24,7 +24,7 @@ struct Args { radius: f32, } -fn run(session: &Session, args: &Args) -> anyhow::Result<()> { +fn run(rec_stream: &RecordingStream, args: &Args) -> anyhow::Result<()> { let timeline_keyframe = Timeline::new("keyframe", TimeType::Sequence); let points = grid( @@ -46,7 +46,7 @@ fn run(session: &Session, args: &Args) -> anyhow::Result<()> { .with_component(&points)? .with_component(&colors)? .with_time(timeline_keyframe, 0) - .send(session)?; + .send(rec_stream)?; Ok(()) } @@ -60,7 +60,7 @@ fn main() -> anyhow::Result<()> { let default_enabled = true; args.rerun .clone() - .run("minimal_options", default_enabled, move |session| { - run(&session, &args).unwrap(); + .run("minimal_options", default_enabled, move |rec_stream| { + run(&rec_stream, &args).unwrap(); }) } diff --git a/examples/rust/objectron/src/main.rs b/examples/rust/objectron/src/main.rs index 203b581a0ec4..a2ecf235960e 100644 --- a/examples/rust/objectron/src/main.rs +++ b/examples/rust/objectron/src/main.rs @@ -21,7 +21,7 @@ use anyhow::{anyhow, Context as _}; use rerun::{ external::re_log, time::{Time, TimePoint, TimeType, Timeline}, - MsgSender, Session, + MsgSender, RecordingStream, }; // --- Rerun logging --- @@ -73,7 +73,7 @@ impl<'a> From<&'a [objectron::FrameAnnotation]> for AnnotationsPerFrame<'a> { } fn log_coordinate_space( - session: &Session, + rec_stream: &RecordingStream, ent_path: impl Into, axes: &str, ) -> anyhow::Result<()> { @@ -83,33 +83,36 @@ fn log_coordinate_space( MsgSender::new(ent_path) .with_timeless(true) .with_component(&[view_coords])? - .send(session) + .send(rec_stream) .map_err(Into::into) } fn log_ar_frame( - session: &Session, + rec_stream: &RecordingStream, annotations: &AnnotationsPerFrame<'_>, ar_frame: &ArFrame, ) -> anyhow::Result<()> { - log_video_frame(session, ar_frame)?; + log_video_frame(rec_stream, ar_frame)?; if let Some(ar_camera) = ar_frame.data.camera.as_ref() { - log_ar_camera(session, ar_frame.timepoint.clone(), ar_camera)?; + log_ar_camera(rec_stream, ar_frame.timepoint.clone(), ar_camera)?; } if let Some(points) = ar_frame.data.raw_feature_points.as_ref() { - log_feature_points(session, ar_frame.timepoint.clone(), points)?; + log_feature_points(rec_stream, ar_frame.timepoint.clone(), points)?; } if let Some(&annotations) = annotations.0.get(&ar_frame.index) { - log_frame_annotations(session, &ar_frame.timepoint, annotations)?; + log_frame_annotations(rec_stream, &ar_frame.timepoint, annotations)?; } Ok(()) } -fn log_baseline_objects(session: &Session, objects: &[objectron::Object]) -> anyhow::Result<()> { +fn log_baseline_objects( + rec_stream: &RecordingStream, + objects: &[objectron::Object], +) -> anyhow::Result<()> { use rerun::components::{Box3D, ColorRGBA, Label, Rigid3, Transform}; let boxes = objects.iter().filter_map(|object| { @@ -143,26 +146,26 @@ fn log_baseline_objects(session: &Session, objects: &[objectron::Object]) -> any .with_component(&[transform])? .with_component(&[label])? .with_splat(ColorRGBA::from_rgb(160, 230, 130))? - .send(session)?; + .send(rec_stream)?; } Ok(()) } -fn log_video_frame(session: &Session, ar_frame: &ArFrame) -> anyhow::Result<()> { +fn log_video_frame(rec_stream: &RecordingStream, ar_frame: &ArFrame) -> anyhow::Result<()> { let image_path = ar_frame.dir.join(format!("video/{}.jpg", ar_frame.index)); let tensor = rerun::components::Tensor::tensor_from_jpeg_file(image_path)?; MsgSender::new("world/camera/video") .with_timepoint(ar_frame.timepoint.clone()) .with_component(&[tensor])? - .send(session)?; + .send(rec_stream)?; Ok(()) } fn log_ar_camera( - session: &Session, + rec_stream: &RecordingStream, timepoint: TimePoint, ar_camera: &objectron::ArCamera, ) -> anyhow::Result<()> { @@ -196,20 +199,20 @@ fn log_ar_camera( rotation: rot.into(), translation: translation.into(), })])? - .send(session)?; + .send(rec_stream)?; MsgSender::new("world/camera/video") .with_timepoint(timepoint) .with_component(&[Transform::Pinhole(Pinhole { image_from_cam: intrinsics.into(), resolution: Some(resolution.into()), })])? - .send(session)?; + .send(rec_stream)?; Ok(()) } fn log_feature_points( - session: &Session, + rec_stream: &RecordingStream, timepoint: TimePoint, points: &objectron::ArPointCloud, ) -> anyhow::Result<()> { @@ -236,13 +239,13 @@ fn log_feature_points( .with_component(&points)? .with_component(&ids)? .with_splat(ColorRGBA::from_rgb(255, 255, 255))? - .send(session)?; + .send(rec_stream)?; Ok(()) } fn log_frame_annotations( - session: &Session, + rec_stream: &RecordingStream, timepoint: &TimePoint, annotations: &objectron::FrameAnnotation, ) -> anyhow::Result<()> { @@ -306,7 +309,7 @@ fn log_frame_annotations( .with_component(&points.into_iter().map(Point2D::from).collect::>())?; } - msg.send(session)?; + msg.send(rec_stream)?; } Ok(()) @@ -360,7 +363,7 @@ fn parse_duration(arg: &str) -> Result anyhow::Result<()> { +fn run(rec_stream: &RecordingStream, args: &Args) -> anyhow::Result<()> { // Parse protobuf dataset let rec_info = args.recording.info().with_context(|| { use clap::ValueEnum as _; @@ -374,10 +377,10 @@ fn run(session: &Session, args: &Args) -> anyhow::Result<()> { let annotations = read_annotations(&rec_info.path_annotations)?; // See https://github.com/google-research-datasets/Objectron/issues/39 - log_coordinate_space(session, "world", "RUB")?; - log_coordinate_space(session, "world/camera", "RDF")?; + log_coordinate_space(rec_stream, "world", "RUB")?; + log_coordinate_space(rec_stream, "world/camera", "RDF")?; - log_baseline_objects(session, &annotations.objects)?; + log_baseline_objects(rec_stream, &annotations.objects)?; let mut global_frame_offset = 0; let mut global_time_offset = 0.0; @@ -404,7 +407,7 @@ fn run(session: &Session, args: &Args) -> anyhow::Result<()> { ar_frame, ); let annotations = annotations.frame_annotations.as_slice().into(); - log_ar_frame(session, &annotations, &ar_frame)?; + log_ar_frame(rec_stream, &annotations, &ar_frame)?; if let Some(d) = args.per_frame_sleep { std::thread::sleep(d); @@ -434,8 +437,8 @@ fn main() -> anyhow::Result<()> { let default_enabled = true; args.rerun .clone() - .run("objectron_rs", default_enabled, move |session| { - run(&session, &args).unwrap(); + .run("objectron_rs", default_enabled, move |rec_stream| { + run(&rec_stream, &args).unwrap(); }) } diff --git a/examples/rust/raw_mesh/src/main.rs b/examples/rust/raw_mesh/src/main.rs index fd1f1e0419fc..692ac1278283 100644 --- a/examples/rust/raw_mesh/src/main.rs +++ b/examples/rust/raw_mesh/src/main.rs @@ -16,7 +16,7 @@ use rerun::components::{ColorRGBA, Mesh3D, MeshId, RawMesh3D, Transform, Vec4D, use rerun::time::{TimeType, Timeline}; use rerun::{ external::{re_log, re_memory::AccountingAllocator}, - EntityPath, MsgSender, Session, + EntityPath, MsgSender, RecordingStream, }; // TODO(cmc): This example needs to support animations to showcase Rerun's time capabilities. @@ -67,7 +67,7 @@ impl From for Transform { } /// Log a glTF node with Rerun. -fn log_node(session: &Session, node: GltfNode) -> anyhow::Result<()> { +fn log_node(rec_stream: &RecordingStream, node: GltfNode) -> anyhow::Result<()> { let ent_path = EntityPath::from(node.name.as_str()); // Convert glTF objects into Rerun components. @@ -83,19 +83,19 @@ fn log_node(session: &Session, node: GltfNode) -> anyhow::Result<()> { .with_time(timeline_keyframe, 0) .with_component(&primitives)? .with_component(transform.as_ref())? - .send(session)?; + .send(rec_stream)?; // Recurse through all of the node's children! for mut child in node.children { child.name = [node.name.as_str(), child.name.as_str()].join("/"); - log_node(session, child)?; + log_node(rec_stream, child)?; } Ok(()) } fn log_coordinate_space( - session: &Session, + rec_stream: &RecordingStream, ent_path: impl Into, axes: &str, ) -> anyhow::Result<()> { @@ -106,7 +106,7 @@ fn log_coordinate_space( MsgSender::new(ent_path) .with_timeless(true) .with_component(&[view_coords])? - .send(session) + .send(rec_stream) .map_err(Into::into) } @@ -171,7 +171,7 @@ impl Args { } } -fn run(session: &Session, args: &Args) -> anyhow::Result<()> { +fn run(rec_stream: &RecordingStream, args: &Args) -> anyhow::Result<()> { // Read glTF scene let (doc, buffers, _) = gltf::import_slice(Bytes::from(std::fs::read(args.scene_path()?)?))?; let nodes = load_gltf(&doc, &buffers); @@ -179,8 +179,8 @@ fn run(session: &Session, args: &Args) -> anyhow::Result<()> { // Log raw glTF nodes and their transforms with Rerun for root in nodes { re_log::info!(scene = root.name, "logging glTF scene"); - log_coordinate_space(session, root.name.as_str(), "RUB")?; - log_node(session, root)?; + log_coordinate_space(rec_stream, root.name.as_str(), "RUB")?; + log_node(rec_stream, root)?; } Ok(()) @@ -195,8 +195,8 @@ fn main() -> anyhow::Result<()> { let default_enabled = true; args.rerun .clone() - .run("raw_mesh_rs", default_enabled, move |session| { - run(&session, &args).unwrap(); + .run("raw_mesh_rs", default_enabled, move |rec_stream| { + run(&rec_stream, &args).unwrap(); }) } diff --git a/rerun_py/src/python_session.rs b/rerun_py/src/python_session.rs index af973f69d5dc..fe56aa0e9a7a 100644 --- a/rerun_py/src/python_session.rs +++ b/rerun_py/src/python_session.rs @@ -172,8 +172,8 @@ impl PythonSession { // Before changing the sink, we set drop_if_disconnected and // flush. This ensures that any messages that are currently // buffered will be sent. - self.sink.drop_msgs_if_disconnected(); - self.sink.flush(); + self.sink.drop_if_disconnected(); + self.sink.flush_blocking(); self.sink = sink; if backlog.is_empty() { @@ -243,7 +243,7 @@ impl PythonSession { /// Wait until all logged data have been sent to the remove server (if any). pub fn flush(&mut self) { - self.sink.flush(); + self.sink.flush_blocking(); } /// Send a single [`DataRow`]. diff --git a/tests/rust/test_image_memory/src/main.rs b/tests/rust/test_image_memory/src/main.rs index 4d42358f5a49..ea6868c0597a 100644 --- a/tests/rust/test_image_memory/src/main.rs +++ b/tests/rust/test_image_memory/src/main.rs @@ -14,13 +14,13 @@ fn main() -> Result<(), Box> { ); let recording_info = rerun::new_recording_info("test_image_memory_rs"); - rerun::native_viewer::spawn(recording_info, |session| { - log_images(&session).unwrap(); + rerun::native_viewer::spawn(recording_info, Default::default(), |rec_stream| { + log_images(&rec_stream).unwrap(); })?; Ok(()) } -fn log_images(session: &rerun::Session) -> Result<(), Box> { +fn log_images(rec_stream: &rerun::RecordingStream) -> Result<(), Box> { let (w, h) = (2048, 1024); let n = 100; @@ -36,9 +36,11 @@ fn log_images(session: &rerun::Session) -> Result<(), Box for _ in 0..n { rerun::MsgSender::new("image") .with_component(&[tensor.clone()])? - .send(session)?; + .send(rec_stream)?; } + rec_stream.flush_blocking(); + eprintln!( "Logged {n} {w}x{h} RGBA images = {}", re_format::format_bytes((n * w * h * 4) as _) From b872b633781827a9ea018560f2e26f1c26c71291 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 27 Apr 2023 18:35:19 +0200 Subject: [PATCH 12/31] sunset PythonSession, introduce global daata RecordStream --- crates/re_sdk_comms/src/buffered_client.rs | 4 + examples/python/clock/main.py | 4 +- examples/python/minimal/main.py | 2 +- examples/python/multiprocessing/main.py | 1 + rerun_py/rerun_sdk/rerun/__init__.py | 45 +- rerun_py/src/lib.rs | 1 - rerun_py/src/python_bridge.rs | 883 ++++++++++++--------- rerun_py/src/python_session.rs | 341 -------- 8 files changed, 534 insertions(+), 747 deletions(-) delete mode 100644 rerun_py/src/python_session.rs diff --git a/crates/re_sdk_comms/src/buffered_client.rs b/crates/re_sdk_comms/src/buffered_client.rs index 797a4a23b34e..a6063e884ddf 100644 --- a/crates/re_sdk_comms/src/buffered_client.rs +++ b/crates/re_sdk_comms/src/buffered_client.rs @@ -202,6 +202,10 @@ fn msg_encode( re_log::error!("Failed to send message to tcp_sender thread. Likely a shutdown race-condition."); return; } + // TODO: this is incorrect and dangerous: flush() can return before this + // thread is done with its workload, which means the python process might be + // dead before this thread is dead, which means we call a C callback that has + // been dunload(). if msg_drop_tx.send(msg_msg).is_err() { re_log::error!("Failed to send message to msg_drop thread. Likely a shutdown race-condition"); return; diff --git a/examples/python/clock/main.py b/examples/python/clock/main.py index 4501a0ad23db..a62fa123d63b 100755 --- a/examples/python/clock/main.py +++ b/examples/python/clock/main.py @@ -59,9 +59,9 @@ def rotate(angle: float, len: float) -> Tuple[float, float, float]: scaled_h = (t_secs % 43200) / 43200.0 point_h = np.array(rotate(math.tau * scaled_h, LENGTH_H)) - color_h = (int(255 - (scaled_h * 255)), int(scaled_h * 255), 255, 255) + color_h = (int(255 - (scaled_h * 255)), int(scaled_h * 255), 255, 128) rr.log_point("world/hours_pt", position=point_h, color=color_h) - rr.log_arrow("world/hours_hand", origin=[0.0, 0.0, 0.0], vector=point_h, color=color_h, width_scale=WIDTH_M) + rr.log_arrow("world/hours_hand", origin=[0.0, 0.0, 0.0], vector=point_h, color=color_h, width_scale=WIDTH_H) if __name__ == "__main__": diff --git a/examples/python/minimal/main.py b/examples/python/minimal/main.py index 92e45a01b000..3c2d8573a600 100755 --- a/examples/python/minimal/main.py +++ b/examples/python/minimal/main.py @@ -9,7 +9,7 @@ _, unknown = __import__("argparse").ArgumentParser().parse_known_args() [__import__("logging").warning(f"unknown arg: {arg}") for arg in unknown] -rr.spawn() +rr.init("minimal", spawn=True) positions = np.vstack([xyz.ravel() for xyz in np.mgrid[3 * [slice(-5, 5, 10j)]]]).T colors = np.vstack([rgb.ravel() for rgb in np.mgrid[3 * [slice(0, 255, 10j)]]]).astype(np.uint8).T diff --git a/examples/python/multiprocessing/main.py b/examples/python/multiprocessing/main.py index 4c849bdbf8b0..4cc319b39405 100755 --- a/examples/python/multiprocessing/main.py +++ b/examples/python/multiprocessing/main.py @@ -14,6 +14,7 @@ def task(title: str) -> None: # All processes spawned with `multiprocessing` will automatically # be assigned the same default recording_id. # We just need to connect each process to the the rerun viewer: + rr.init("multiprocessing") rr.connect() rr.log_text_entry( diff --git a/rerun_py/rerun_sdk/rerun/__init__.py b/rerun_py/rerun_sdk/rerun/__init__.py index 799ec2ccfd50..b17b919217e4 100644 --- a/rerun_py/rerun_sdk/rerun/__init__.py +++ b/rerun_py/rerun_sdk/rerun/__init__.py @@ -29,8 +29,6 @@ "ClassDescription", "LoggingHandler", "bindings", - "components", - "inline_show", "ImageFormat", "log_annotation_context", "log_arrow", @@ -58,7 +56,6 @@ "log_text_entry", "log_unknown_transform", "log_view_coordinates", - "notebook", "LogLevel", "MeshFormat", "RectFormat", @@ -109,29 +106,13 @@ def get_recording_id() -> str: return str(bindings.get_recording_id()) -def set_recording_id(value: str) -> None: - """ - Set the recording ID that this process is logging to, as a UUIDv4. - - The default recording_id is based on `multiprocessing.current_process().authkey` - which means that all processes spawned with `multiprocessing` - will have the same default recording_id. - - If you are not using `multiprocessing` and still want several different Python - processes to log to the same Rerun instance (and be part of the same recording), - you will need to manually assign them all the same recording_id. - Any random UUIDv4 will work, or copy the recording id for the parent process. - - Parameters - ---------- - value : str - The recording ID to use for this process. - - """ - bindings.set_recording_id(value) - - -def init(application_id: str, spawn: bool = False, default_enabled: bool = True, strict: bool = False) -> None: +def init( + application_id: str, + recording_id: Optional[str] = None, + spawn: bool = False, + default_enabled: bool = True, + strict: bool = False, +) -> None: """ Initialize the Rerun SDK with a user-chosen application id (name). @@ -144,6 +125,17 @@ def init(application_id: str, spawn: bool = False, default_enabled: bool = True, For example, if you have one application doing object detection and another doing camera calibration, you could have `rerun.init("object_detector")` and `rerun.init("calibrator")`. + recording_id : Optional[str] + Set the recording ID that this process is logging to, as a UUIDv4. + + The default recording_id is based on `multiprocessing.current_process().authkey` + which means that all processes spawned with `multiprocessing` + will have the same default recording_id. + + If you are not using `multiprocessing` and still want several different Python + processes to log to the same Rerun instance (and be part of the same recording), + you will need to manually assign them all the same recording_id. + Any random UUIDv4 will work, or copy the recording id for the parent process. spawn : bool Spawn a Rerun Viewer and stream logging data to it. Short for calling `spawn` separately. @@ -189,6 +181,7 @@ def init(application_id: str, spawn: bool = False, default_enabled: bool = True, bindings.init( application_id=application_id, + recording_id=recording_id, application_path=application_path, default_enabled=default_enabled, ) diff --git a/rerun_py/src/lib.rs b/rerun_py/src/lib.rs index 587c7422a28f..73c8e4a368a5 100644 --- a/rerun_py/src/lib.rs +++ b/rerun_py/src/lib.rs @@ -15,4 +15,3 @@ static GLOBAL: AccountingAllocator = mod arrow; mod python_bridge; -mod python_session; diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index d7485517047c..2e9a8cb19ab2 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -6,17 +6,17 @@ use std::{borrow::Cow, io::Cursor, path::PathBuf}; use itertools::izip; use pyo3::{ - exceptions::{PyRuntimeError, PyTypeError, PyValueError}, + exceptions::{PyRuntimeError, PyTypeError}, prelude::*, types::{PyBytes, PyDict}, }; -use re_log_types::{ArrowMsg, DataRow, DataTableError}; +use re_log_types::DataRow; use rerun::{ log::{PathOp, RowId}, sink::MemorySinkStorage, time::{Time, TimeInt, TimePoint, TimeType, Timeline}, - ApplicationId, EntityPath, RecordingId, + EntityPath, RecordingId, RecordingStream, RecordingStreamBuilder, }; pub use rerun::{ @@ -35,88 +35,34 @@ use re_web_viewer_server::WebViewerServerPort; #[cfg(feature = "web_viewer")] use re_ws_comms::RerunServerPort; -use crate::{arrow::get_registered_component_names, python_session::PythonSession}; +use crate::arrow::get_registered_component_names; -// ---------------------------------------------------------------------------- +// --- FFI --- -/// The global [`PythonSession`] object used by the Python API. -fn python_session() -> parking_lot::MutexGuard<'static, PythonSession> { +// TODO +/// The global [`RecordingStream`] object used by the Python API. +fn global_data_stream() -> parking_lot::MutexGuard<'static, Option> { use once_cell::sync::OnceCell; use parking_lot::Mutex; - static PYTHON_SESSION: OnceCell> = OnceCell::new(); - PYTHON_SESSION.get_or_init(Default::default).lock() + static REC_CTX: OnceCell>> = OnceCell::new(); + REC_CTX.get_or_init(Default::default).lock() } -// ---------------------------------------------------------------------------- - -/// Thread-local info -#[derive(Default)] -struct ThreadInfo { - /// The current time, which can be set by users. - time_point: TimePoint, -} - -impl ThreadInfo { - pub fn thread_now() -> TimePoint { - Self::with(|ti| ti.now()) - } - - pub fn set_thread_time(timeline: Timeline, time_int: Option) { - Self::with(|ti| ti.set_time(timeline, time_int)); - } - - pub fn reset_thread_time() { - Self::with(|ti| ti.reset_time()); - } - - /// Get access to the thread-local [`ThreadInfo`]. - fn with(f: impl FnOnce(&mut ThreadInfo) -> R) -> R { - use std::cell::RefCell; - thread_local! { - static THREAD_INFO: RefCell> = RefCell::new(None); - } - - THREAD_INFO.with(|thread_info| { - let mut thread_info = thread_info.borrow_mut(); - let thread_info = thread_info.get_or_insert_with(ThreadInfo::default); - f(thread_info) - }) - } - - fn now(&self) -> TimePoint { - let mut time_point = self.time_point.clone(); - time_point.insert(Timeline::log_time(), Time::now().into()); - time_point - } - - fn set_time(&mut self, timeline: Timeline, time_int: Option) { - if let Some(time_int) = time_int { - self.time_point.insert(timeline, time_int); - } else { - self.time_point.remove(&timeline); - } - } - - fn reset_time(&mut self) { - self.time_point = TimePoint::default(); - } -} - -// ---------------------------------------------------------------------------- - -fn python_version(py: Python<'_>) -> re_log_types::PythonVersion { - let py_version = py.version_info(); - re_log_types::PythonVersion { - major: py_version.major, - minor: py_version.minor, - patch: py_version.patch, - suffix: py_version.suffix.map(|s| s.to_owned()).unwrap_or_default(), - } +#[pyfunction] +fn main(py: Python<'_>, argv: Vec) -> PyResult { + let build_info = re_build_info::build_info!(); + let call_src = rerun::CallSource::Python(python_version(py)); + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap() + .block_on(rerun::run(build_info, call_src, argv)) + .map_err(|err| PyRuntimeError::new_err(re_error::format(err))) } /// The python module is called "rerun_bindings". #[pymodule] -fn rerun_bindings(py: Python<'_>, m: &PyModule) -> PyResult<()> { +fn rerun_bindings(_py: Python<'_>, m: &PyModule) -> PyResult<()> { // NOTE: We do this here because some the inner init methods don't respond too kindly to being // called more than once. re_log::setup_native_logging(); @@ -137,53 +83,122 @@ fn rerun_bindings(py: Python<'_>, m: &PyModule) -> PyResult<()> { return Ok(()); } - python_session().set_python_version(python_version(py)); - - // NOTE: We do this here because we want child processes to share the same recording-id, - // whether the user has called `init` or not. - // See `default_recording_id` for extra information. - python_session().set_recording_id(default_recording_id(py)); - - m.add_function(wrap_pyfunction!(get_recording_id, m)?)?; - m.add_function(wrap_pyfunction!(set_recording_id, m)?)?; + // init + m.add_function(wrap_pyfunction!(init, m)?)?; + m.add_function(wrap_pyfunction!(is_enabled, m)?)?; + m.add_function(wrap_pyfunction!(shutdown, m)?)?; + // sinks m.add_function(wrap_pyfunction!(connect, m)?)?; + m.add_function(wrap_pyfunction!(save, m)?)?; + m.add_function(wrap_pyfunction!(memory_recording, m)?)?; + m.add_function(wrap_pyfunction!(serve, m)?)?; m.add_function(wrap_pyfunction!(disconnect, m)?)?; m.add_function(wrap_pyfunction!(flush, m)?)?; - m.add_function(wrap_pyfunction!(get_app_url, m)?)?; - m.add_function(wrap_pyfunction!(init, m)?)?; - m.add_function(wrap_pyfunction!(is_enabled, m)?)?; - m.add_function(wrap_pyfunction!(memory_recording, m)?)?; - m.add_function(wrap_pyfunction!(save, m)?)?; m.add_function(wrap_pyfunction!(start_web_viewer_server, m)?)?; - m.add_function(wrap_pyfunction!(serve, m)?)?; - m.add_function(wrap_pyfunction!(set_enabled, m)?)?; - m.add_function(wrap_pyfunction!(shutdown, m)?)?; + // time m.add_function(wrap_pyfunction!(set_time_sequence, m)?)?; m.add_function(wrap_pyfunction!(set_time_seconds, m)?)?; m.add_function(wrap_pyfunction!(set_time_nanos, m)?)?; m.add_function(wrap_pyfunction!(reset_time, m)?)?; + // log transforms m.add_function(wrap_pyfunction!(log_unknown_transform, m)?)?; m.add_function(wrap_pyfunction!(log_rigid3, m)?)?; m.add_function(wrap_pyfunction!(log_pinhole, m)?)?; - m.add_function(wrap_pyfunction!(log_meshes, m)?)?; - + // log view coordinates m.add_function(wrap_pyfunction!(log_view_coordinates_xyz, m)?)?; m.add_function(wrap_pyfunction!(log_view_coordinates_up_handedness, m)?)?; + // log segmentation m.add_function(wrap_pyfunction!(log_annotation_context, m)?)?; + // log assets + m.add_function(wrap_pyfunction!(log_meshes, m)?)?; m.add_function(wrap_pyfunction!(log_mesh_file, m)?)?; m.add_function(wrap_pyfunction!(log_image_file, m)?)?; + + // log special m.add_function(wrap_pyfunction!(log_cleared, m)?)?; m.add_function(wrap_pyfunction!(log_arrow_msg, m)?)?; + // misc + m.add_function(wrap_pyfunction!(get_app_url, m)?)?; + m.add_function(wrap_pyfunction!(get_recording_id, m)?)?; + Ok(()) } +fn no_active_recording(origin: &str) { + re_log::debug!("No active recording - call to {origin}() ignored (have you called rr.init()?)",); +} + +// --- Init --- + +// TODO: actual python object for RecordingStream + make_default logic etc +#[pyfunction] +#[pyo3(signature = ( + application_id, + recording_id=None, + application_path=None, + default_enabled=true, +))] +fn init( + py: Python<'_>, + application_id: String, + recording_id: Option, + application_path: Option, + default_enabled: bool, +) -> PyResult<()> { + // TODO: pass this to the builder and let it do that logic + // The sentinel file we use to identify the official examples directory. + const SENTINEL_FILENAME: &str = ".rerun_examples"; + let is_official_example = application_path.map_or(false, |mut path| { + // more than 4 layers would be really pushing it + for _ in 0..4 { + path.pop(); // first iteration is always a file path in our examples + if path.join(SENTINEL_FILENAME).exists() { + return true; + } + } + false + }); + + let recording_id = if let Some(recording_id) = recording_id { + recording_id.parse().map_err(|_err| { + PyTypeError::new_err(format!( + "Invalid recording id - expected a UUID, got {recording_id:?}" + )) + })? + } else { + default_recording_id(py) + }; + + let mut rec_stream = global_data_stream(); + *rec_stream = RecordingStreamBuilder::new(application_id) + .is_official_example(is_official_example) + .recording_id(recording_id) + .recording_source(re_log_types::RecordingSource::PythonSdk(python_version(py))) + .default_enabled(default_enabled) + .buffered() + .map_err(|err| PyRuntimeError::new_err(err.to_string()))? + .into(); + + Ok(()) +} + +fn python_version(py: Python<'_>) -> re_log_types::PythonVersion { + let py_version = py.version_info(); + re_log_types::PythonVersion { + major: py_version.major, + minor: py_version.minor, + patch: py_version.patch, + suffix: py_version.suffix.map(|s| s.to_owned()).unwrap_or_default(), + } +} + fn default_recording_id(py: Python<'_>) -> RecordingId { use rand::{Rng as _, SeedableRng as _}; use std::hash::{Hash as _, Hasher as _}; @@ -224,104 +239,80 @@ authkey = multiprocessing.current_process().authkey authkey.as_bytes().to_vec() } -// ---------------------------------------------------------------------------- - -fn parse_entity_path(entity_path: &str) -> PyResult { - let components = re_log_types::parse_entity_path(entity_path) - .map_err(|err| PyTypeError::new_err(err.to_string()))?; - if components.is_empty() { - Err(PyTypeError::new_err( - "You cannot log to the root {entity_path:?}", - )) - } else { - Ok(EntityPath::from(components)) - } -} - -fn time(timeless: bool) -> TimePoint { - if timeless { - TimePoint::timeless() - } else { - ThreadInfo::thread_now() - } +/// Is logging enabled in the global recording? +#[pyfunction] +fn is_enabled() -> bool { + global_data_stream() + .as_ref() + .map_or(false, |rec_stream| rec_stream.is_enabled()) } -// ---------------------------------------------------------------------------- - #[pyfunction] -fn main(py: Python<'_>, argv: Vec) -> PyResult { - let build_info = re_build_info::build_info!(); - let call_src = rerun::CallSource::Python(python_version(py)); - tokio::runtime::Builder::new_multi_thread() - .enable_all() - .build() - .unwrap() - .block_on(rerun::run(build_info, call_src, argv)) - .map_err(|err| PyRuntimeError::new_err(re_error::format(err))) +fn shutdown(py: Python<'_>) { + re_log::debug!("Shutting down the Rerun SDK"); + // Disconnect the current sink which ensures that + // it flushes and cleans up. + disconnect(py); } +// --- Sinks --- + #[pyfunction] -fn get_recording_id() -> PyResult { - let recording_id = python_session().recording_id(); +fn connect(addr: Option) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("connect"); + return Ok(()); + }; - if recording_id == RecordingId::ZERO { - Err(PyTypeError::new_err("module has not been initialized")) + let addr = if let Some(addr) = addr { + addr.parse()? } else { - Ok(recording_id.to_string()) - } -} + rerun::default_server_addr() + }; -#[pyfunction] -fn set_recording_id(recording_id: &str) -> PyResult<()> { - if let Ok(recording_id) = recording_id.parse() { - python_session().set_recording_id(recording_id); - Ok(()) - } else { - Err(PyTypeError::new_err(format!( - "Invalid recording id - expected a UUID, got {recording_id:?}" - ))) - } + rec_stream.connect(addr); + + Ok(()) } #[pyfunction] -#[pyo3(signature = (application_id, application_path=None, default_enabled=true))] -fn init(application_id: String, application_path: Option, default_enabled: bool) { - // The sentinel file we use to identify the official examples directory. - const SENTINEL_FILENAME: &str = ".rerun_examples"; - let is_official_example = application_path.map_or(false, |mut path| { - // more than 4 layers would be really pushing it - for _ in 0..4 { - path.pop(); // first iteration is always a file path in our examples - if path.join(SENTINEL_FILENAME).exists() { - return true; - } - } - false - }); +fn save(path: &str) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("save"); + return Ok(()); + }; - let mut session = python_session(); - session.set_default_enabled(default_enabled); - session.set_application_id(ApplicationId(application_id), is_official_example); + rec_stream + .save(path) + .map_err(|err| PyRuntimeError::new_err(err.to_string())) } +/// Create an in-memory rrd file #[pyfunction] -fn connect(addr: Option) -> PyResult<()> { - let addr = if let Some(addr) = addr { - addr.parse()? - } else { - rerun::default_server_addr() +fn memory_recording() -> PyMemorySinkStorage { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("memory_recording"); + return Default::default(); }; - python_session().connect(addr); - Ok(()) + + PyMemorySinkStorage(rec_stream.memory()) } -#[must_use = "the tokio_runtime guard must be kept alive while using tokio"] -#[cfg(feature = "web_viewer")] -fn enter_tokio_runtime() -> tokio::runtime::EnterGuard<'static> { - use once_cell::sync::Lazy; - static TOKIO_RUNTIME: Lazy = - Lazy::new(|| tokio::runtime::Runtime::new().expect("Failed to create tokio runtime")); - TOKIO_RUNTIME.enter() +#[pyclass] +#[derive(Default)] +struct PyMemorySinkStorage(MemorySinkStorage); + +#[pymethods] +impl PyMemorySinkStorage { + fn get_rrd_as_bytes<'p>(&self, py: Python<'p>) -> PyResult<&'p PyBytes> { + self.0 + .rrd_as_bytes() + .map(|bytes| PyBytes::new(py, bytes.as_slice())) + .map_err(|err| PyRuntimeError::new_err(err.to_string())) + } } /// Serve a web-viewer. @@ -330,16 +321,24 @@ fn enter_tokio_runtime() -> tokio::runtime::EnterGuard<'static> { fn serve(open_browser: bool, web_port: Option, ws_port: Option) -> PyResult<()> { #[cfg(feature = "web_viewer")] { - let mut session = python_session(); + #[must_use = "the tokio_runtime guard must be kept alive while using tokio"] + fn enter_tokio_runtime() -> tokio::runtime::EnterGuard<'static> { + use once_cell::sync::Lazy; + static TOKIO_RUNTIME: Lazy = Lazy::new(|| { + tokio::runtime::Runtime::new().expect("Failed to create tokio runtime") + }); + TOKIO_RUNTIME.enter() + } - if !session.is_enabled() { - re_log::debug!("Rerun disabled - call to serve() ignored"); + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("serve"); return Ok(()); - } + }; let _guard = enter_tokio_runtime(); - session.set_sink( + rec_stream.set_sink( rerun::web_viewer::new_sink( open_browser, web_port.map(WebViewerServerPort).unwrap_or_default(), @@ -362,108 +361,78 @@ fn serve(open_browser: bool, web_port: Option, ws_port: Option) -> PyR } } -#[pyfunction] -// TODO(jleibs) expose this as a python type -fn start_web_viewer_server(port: u16) -> PyResult<()> { - #[cfg(feature = "web_viewer")] - { - let mut session = python_session(); - let _guard = enter_tokio_runtime(); - session - .start_web_viewer_server(WebViewerServerPort(port)) - .map_err(|err| PyRuntimeError::new_err(err.to_string())) - } - - #[cfg(not(feature = "web_viewer"))] - { - _ = port; - Err(PyRuntimeError::new_err( - "The Rerun SDK was not compiled with the 'web_viewer' feature", - )) - } -} - -#[pyfunction] -fn get_app_url() -> String { - let session = python_session(); - session.get_app_url() -} - -#[pyfunction] -fn shutdown(py: Python<'_>) { - re_log::debug!("Shutting down the Rerun SDK"); - // Disconnect the current sink which ensures that - // it flushes and cleans up. - disconnect(py); -} - -/// Is logging enabled in the global session? -#[pyfunction] -fn is_enabled() -> bool { - python_session().is_enabled() -} - -/// Enable or disable logging in the global session. +/// Disconnect from remote server (if any). /// -/// This is a global setting that affects all threads. -/// By default logging is enabled. -#[pyfunction] -fn set_enabled(enabled: bool) { - python_session().set_enabled(enabled); -} - -/// Block until outstanding data has been flushed to the sink +/// Subsequent log messages will be buffered and either sent on the next call to `connect`, +/// or shown with `show`. #[pyfunction] -fn flush(py: Python<'_>) { +fn disconnect(py: Python<'_>) { // Release the GIL in case any flushing behavior needs to // cleanup a python object. py.allow_threads(|| { - python_session().flush(); + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("disconnect"); + return; + }; + rec_stream.disconnect(); }); } -/// Disconnect from remote server (if any). -/// -/// Subsequent log messages will be buffered and either sent on the next call to `connect`, -/// or shown with `show`. +// TODO +/// Block until outstanding data has been flushed to the sink #[pyfunction] -fn disconnect(py: Python<'_>) { +fn flush(py: Python<'_>) { // Release the GIL in case any flushing behavior needs to // cleanup a python object. py.allow_threads(|| { - python_session().disconnect(); + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("flush"); + return; + }; + rec_stream.flush_blocking(); }); } #[pyfunction] -fn save(path: &str) -> PyResult<()> { - let mut session = python_session(); - session - .save(path) - .map_err(|err| PyRuntimeError::new_err(err.to_string())) -} - -#[pyclass] -struct PyMemorySinkStorage(MemorySinkStorage); - -#[pymethods] -impl PyMemorySinkStorage { - fn get_rrd_as_bytes<'p>(&self, py: Python<'p>) -> PyResult<&'p PyBytes> { - self.0 - .rrd_as_bytes() - .map(|bytes| PyBytes::new(py, bytes.as_slice())) - .map_err(|err| PyRuntimeError::new_err(err.to_string())) - } -} +// TODO(jleibs) expose this as a python type +fn start_web_viewer_server(port: u16) -> PyResult<()> { + Ok(()) -/// Create an in-memory rrd file -#[pyfunction] -fn memory_recording() -> PyMemorySinkStorage { - let mut session = python_session(); - PyMemorySinkStorage(session.memory_recording()) + // /// Start a web server to host the run web-assets + // /// + // /// The caller needs to ensure that there is a `tokio` runtime running. + // #[allow(clippy::unnecessary_wraps)] + // #[cfg(feature = "web_viewer")] + // pub fn start_web_viewer_server( + // &mut self, + // _web_port: WebViewerServerPort, + // ) -> Result<(), PythonSessionError> { + // self.web_viewer_server = Some(re_web_viewer_server::WebViewerServerHandle::new(_web_port)?); + + // Ok(()) + // } + + // #[cfg(feature = "web_viewer")] + // { + // let mut session = python_session(); + // let _guard = enter_tokio_runtime(); + // session + // .start_web_viewer_server(WebViewerServerPort(port)) + // .map_err(|err| PyRuntimeError::new_err(err.to_string())) + // } + + // #[cfg(not(feature = "web_viewer"))] + // { + // _ = port; + // Err(PyRuntimeError::new_err( + // "The Rerun SDK was not compiled with the 'web_viewer' feature", + // )) + // } } -// ---------------------------------------------------------------------------- +// --- Time --- /// Set the current time globally. Used for all subsequent logging, /// until the next call to `set_time_sequence`. @@ -495,21 +464,66 @@ fn set_time_nanos(timeline: &str, ns: Option) { ); } -#[pyfunction] -fn reset_time() { - ThreadInfo::reset_thread_time(); -} +#[pyfunction] +fn reset_time() { + ThreadInfo::reset_thread_time(); +} + +/// Thread-local info +#[derive(Default)] +struct ThreadInfo { + /// The current time, which can be set by users. + time_point: TimePoint, +} + +impl ThreadInfo { + pub fn thread_now() -> TimePoint { + Self::with(|ti| ti.now()) + } + + pub fn set_thread_time(timeline: Timeline, time_int: Option) { + Self::with(|ti| ti.set_time(timeline, time_int)); + } + + pub fn reset_thread_time() { + Self::with(|ti| ti.reset_time()); + } + + /// Get access to the thread-local [`ThreadInfo`]. + fn with(f: impl FnOnce(&mut ThreadInfo) -> R) -> R { + use std::cell::RefCell; + thread_local! { + static THREAD_INFO: RefCell> = RefCell::new(None); + } + + THREAD_INFO.with(|thread_info| { + let mut thread_info = thread_info.borrow_mut(); + let thread_info = thread_info.get_or_insert_with(ThreadInfo::default); + f(thread_info) + }) + } + + fn now(&self) -> TimePoint { + let mut time_point = self.time_point.clone(); + time_point.insert(Timeline::log_time(), Time::now().into()); + time_point + } + + fn set_time(&mut self, timeline: Timeline, time_int: Option) { + if let Some(time_int) = time_int { + self.time_point.insert(timeline, time_int); + } else { + self.time_point.remove(&timeline); + } + } -fn convert_color(color: Vec) -> PyResult<[u8; 4]> { - match &color[..] { - [r, g, b] => Ok([*r, *g, *b, 255]), - [r, g, b, a] => Ok([*r, *g, *b, *a]), - _ => Err(PyTypeError::new_err(format!( - "Expected color to be of length 3 or 4, got {color:?}" - ))), + fn reset_time(&mut self) { + self.time_point = TimePoint::default(); } } +// --- Log transforms --- + #[pyfunction] fn log_unknown_transform(entity_path: &str, timeless: bool) -> PyResult<()> { let transform = re_log_types::Transform::Unknown; @@ -559,11 +573,16 @@ fn log_transform( transform: re_log_types::Transform, timeless: bool, ) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("log_transform"); + return Ok(()); + }; + let entity_path = parse_entity_path(entity_path)?; if entity_path.is_root() { return Err(PyTypeError::new_err("Transforms are between a child entity and its parent, so the root cannot have a transform")); } - let mut session = python_session(); let time_point = time(timeless); // We currently log arrow transforms from inside the bridge because we are @@ -580,10 +599,12 @@ fn log_transform( [transform].as_slice(), ); - session.send_row(row) + record_row(rec_stream, row); + + Ok(()) } -// ---------------------------------------------------------------------------- +// --- Log view coordinates --- #[pyfunction] #[pyo3(signature = (entity_path, xyz, right_handed = None, timeless = false))] @@ -630,6 +651,12 @@ fn log_view_coordinates( coordinates: ViewCoordinates, timeless: bool, ) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + re_log::debug!("No active recording - call to log_view_coordinates() ignored"); + return Ok(()); + }; + if coordinates.handedness() == Some(Handedness::Left) { re_log::warn_once!("Left-handed coordinate systems are not yet fully supported by Rerun"); } @@ -641,7 +668,6 @@ fn log_view_coordinates( parse_entity_path(entity_path_str)? }; - let mut session = python_session(); let time_point = time(timeless); // We currently log view coordinates from inside the bridge because the code @@ -658,22 +684,93 @@ fn log_view_coordinates( [coordinates].as_slice(), ); - session.send_row(row) + record_row(rec_stream, row); + + Ok(()) } -// ---------------------------------------------------------------------------- +// --- Log segmentation --- -// TODO(jleibs): This shadows [`re_log_types::TensorDataMeaning`] -// -#[pyclass] -#[derive(Clone, Debug)] -enum TensorDataMeaning { - Unknown, - ClassId, - Depth, +#[derive(FromPyObject)] +struct AnnotationInfoTuple(u16, Option, Option>); + +impl From for AnnotationInfo { + fn from(tuple: AnnotationInfoTuple) -> Self { + let AnnotationInfoTuple(id, label, color) = tuple; + Self { + id, + label: label.map(Label), + color: color + .as_ref() + .map(|color| convert_color(color.clone()).unwrap()) + .map(|bytes| bytes.into()), + } + } +} + +type ClassDescriptionTuple = (AnnotationInfoTuple, Vec, Vec); + +#[pyfunction] +fn log_annotation_context( + entity_path_str: &str, + class_descriptions: Vec, + timeless: bool, +) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + re_log::debug!("No active recording - call to log_annotation_context() ignored"); + return Ok(()); + }; + + // We normally disallow logging to root, but we make an exception for class_descriptions + let entity_path = if entity_path_str == "/" { + EntityPath::root() + } else { + parse_entity_path(entity_path_str)? + }; + + let mut annotation_context = AnnotationContext::default(); + + for (info, keypoint_annotations, keypoint_skeleton_edges) in class_descriptions { + annotation_context + .class_map + .entry(ClassId(info.0)) + .or_insert_with(|| ClassDescription { + info: info.into(), + keypoint_map: keypoint_annotations + .into_iter() + .map(|k| (KeypointId(k.0), k.into())) + .collect(), + keypoint_connections: keypoint_skeleton_edges + .chunks_exact(2) + .map(|pair| (KeypointId(pair[0]), KeypointId(pair[1]))) + .collect(), + }); + } + + let time_point = time(timeless); + + // We currently log AnnotationContext from inside the bridge because it's a + // fairly complex type with a need for a fair amount of data-validation. We + // already have the serialization implemented in rust so we start with this + // implementation. + // + // TODO(jleibs) replace with python-native implementation + + let row = DataRow::from_cells1( + RowId::random(), + entity_path, + time_point, + 1, + [annotation_context].as_slice(), + ); + + record_row(rec_stream, row); + + Ok(()) } -// ---------------------------------------------------------------------------- +// --- Log assets --- #[pyfunction] fn log_meshes( @@ -685,6 +782,12 @@ fn log_meshes( albedo_factors: Vec>>, timeless: bool, ) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("log_meshes"); + return Ok(()); + }; + let entity_path = parse_entity_path(entity_path_str)?; // Make sure we have as many position buffers as index buffers, etc. @@ -704,8 +807,6 @@ fn log_meshes( ))); } - let mut session = python_session(); - let time_point = time(timeless); let mut meshes = Vec::with_capacity(position_buffers.len()); @@ -786,7 +887,9 @@ fn log_meshes( meshes, ); - session.send_row(row) + record_row(rec_stream, row); + + Ok(()) } #[pyfunction] @@ -797,6 +900,12 @@ fn log_mesh_file( transform: numpy::PyReadonlyArray2<'_, f32>, timeless: bool, ) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("log_mesh_file"); + return Ok(()); + }; + let entity_path = parse_entity_path(entity_path_str)?; let format = match mesh_format { "GLB" => MeshFormat::Glb, @@ -835,8 +944,6 @@ fn log_mesh_file( ] }; - let mut session = python_session(); - let time_point = time(timeless); let mesh3d = Mesh3D::Encoded(EncodedMesh3D { @@ -861,7 +968,9 @@ fn log_mesh_file( [mesh3d].as_slice(), ); - session.send_row(row) + record_row(rec_stream, row); + + Ok(()) } /// Log an image file given its contents or path on disk. @@ -876,6 +985,12 @@ fn log_image_file( img_format: Option<&str>, timeless: bool, ) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("log_image_file"); + return Ok(()); + }; + let entity_path = parse_entity_path(entity_path)?; let img_bytes = match (img_bytes, img_path) { @@ -923,8 +1038,6 @@ fn log_image_file( } }; - let mut session = python_session(); - let time_point = time(timeless); let tensor = re_log_types::component_types::Tensor { @@ -947,90 +1060,34 @@ fn log_image_file( [tensor].as_slice(), ); - session.send_row(row) -} + record_row(rec_stream, row); -#[derive(FromPyObject)] -struct AnnotationInfoTuple(u16, Option, Option>); + Ok(()) +} -impl From for AnnotationInfo { - fn from(tuple: AnnotationInfoTuple) -> Self { - let AnnotationInfoTuple(id, label, color) = tuple; - Self { - id, - label: label.map(Label), - color: color - .as_ref() - .map(|color| convert_color(color.clone()).unwrap()) - .map(|bytes| bytes.into()), - } - } +// TODO(jleibs): This shadows [`re_log_types::TensorDataMeaning`] +#[pyclass] +#[derive(Clone, Debug)] +enum TensorDataMeaning { + Unknown, + ClassId, + Depth, } -type ClassDescriptionTuple = (AnnotationInfoTuple, Vec, Vec); +// --- Log special --- #[pyfunction] -fn log_annotation_context( - entity_path_str: &str, - class_descriptions: Vec, - timeless: bool, -) -> PyResult<()> { - let mut session = python_session(); - - // We normally disallow logging to root, but we make an exception for class_descriptions - let entity_path = if entity_path_str == "/" { - EntityPath::root() - } else { - parse_entity_path(entity_path_str)? +fn log_cleared(entity_path: &str, recursive: bool) -> PyResult<()> { + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("log_cleared"); + return Ok(()); }; - let mut annotation_context = AnnotationContext::default(); - - for (info, keypoint_annotations, keypoint_skeleton_edges) in class_descriptions { - annotation_context - .class_map - .entry(ClassId(info.0)) - .or_insert_with(|| ClassDescription { - info: info.into(), - keypoint_map: keypoint_annotations - .into_iter() - .map(|k| (KeypointId(k.0), k.into())) - .collect(), - keypoint_connections: keypoint_skeleton_edges - .chunks_exact(2) - .map(|pair| (KeypointId(pair[0]), KeypointId(pair[1]))) - .collect(), - }); - } - - let time_point = time(timeless); - - // We currently log AnnotationContext from inside the bridge because it's a - // fairly complex type with a need for a fair amount of data-validation. We - // already have the serialization implemented in rust so we start with this - // implementation. - // - // TODO(jleibs) replace with python-native implementation - - let row = DataRow::from_cells1( - RowId::random(), - entity_path, - time_point, - 1, - [annotation_context].as_slice(), - ); - - session.send_row(row) -} - -#[pyfunction] -fn log_cleared(entity_path: &str, recursive: bool) -> PyResult<()> { let entity_path = parse_entity_path(entity_path)?; - let mut session = python_session(); + let timepoint = time(false); - let time_point = time(false); - - session.send_path_op(&time_point, PathOp::clear(recursive, entity_path)); + rec_stream.record_path_op(timepoint, PathOp::clear(recursive, entity_path)); Ok(()) } @@ -1045,18 +1102,68 @@ fn log_arrow_msg(entity_path: &str, components: &PyDict, timeless: bool) -> PyRe let data_table = crate::arrow::build_data_table_from_components(&entity_path, components, &time(timeless))?; - let mut session = python_session(); - - let msg: ArrowMsg = data_table - .to_arrow_msg() - .map_err(|err: DataTableError| PyValueError::new_err(err.to_string()))?; + let rec_stream = global_data_stream(); + let Some(rec_stream) = rec_stream.as_ref() else { + no_active_recording("log_arrow_msg"); + return Ok(()); + }; - session.send_arrow_msg(msg); + // TODO: do this well + for row in data_table.to_rows() { + record_row(rec_stream, row); + } Ok(()) } -// ---------------------------------------------------------------------------- +// --- Misc --- + +#[pyfunction] +fn get_recording_id() -> PyResult { + let recording_id = global_data_stream() + .as_ref() + .and_then(|rec_stream| rec_stream.recording_info().map(|info| info.recording_id)); + + match recording_id { + Some(id) => Ok(id.to_string()), + None => Err(PyTypeError::new_err("module has not been initialized")), + } +} + +#[pyfunction] +fn get_app_url() -> String { + // TODO + "TODO".to_owned() + + // /// Get a url to an instance of the web-viewer + // /// + // /// This may point to app.rerun.io or localhost depending on + // /// whether `host_assets` was called. + // pub fn get_app_url(&self) -> String { + // #[cfg(feature = "web_viewer")] + // if let Some(hosted_assets) = &self.web_viewer_server { + // return format!("http://localhost:{}", hosted_assets.port()); + // } + + // let short_git_hash = &self.build_info.git_hash[..7]; + // format!("https://app.rerun.io/commit/{short_git_hash}") + // } + + // let session = python_session(); + // session.get_app_url() +} + +// --- Helpers --- + +fn convert_color(color: Vec) -> PyResult<[u8; 4]> { + match &color[..] { + [r, g, b] => Ok([*r, *g, *b, 255]), + [r, g, b, a] => Ok([*r, *g, *b, *a]), + _ => Err(PyTypeError::new_err(format!( + "Expected color to be of length 3 or 4, got {color:?}" + ))), + } +} fn slice_from_np_array<'a, T: numpy::Element, D: numpy::ndarray::Dimension>( array: &'a numpy::PyReadonlyArray<'_, T, D>, @@ -1074,3 +1181,27 @@ fn slice_from_np_array<'a, T: numpy::Element, D: numpy::ndarray::Dimension>( Cow::Owned(array.iter().cloned().collect()) } + +fn record_row(rec_stream: &RecordingStream, row: DataRow) { + rec_stream.record_row(row); +} + +fn parse_entity_path(entity_path: &str) -> PyResult { + let components = re_log_types::parse_entity_path(entity_path) + .map_err(|err| PyTypeError::new_err(err.to_string()))?; + if components.is_empty() { + Err(PyTypeError::new_err( + "You cannot log to the root {entity_path:?}", + )) + } else { + Ok(EntityPath::from(components)) + } +} + +fn time(timeless: bool) -> TimePoint { + if timeless { + TimePoint::timeless() + } else { + ThreadInfo::thread_now() + } +} diff --git a/rerun_py/src/python_session.rs b/rerun_py/src/python_session.rs deleted file mode 100644 index fe56aa0e9a7a..000000000000 --- a/rerun_py/src/python_session.rs +++ /dev/null @@ -1,341 +0,0 @@ -use std::net::SocketAddr; - -use pyo3::{exceptions::PyValueError, PyResult}; -use re_log_types::{ - ApplicationId, ArrowMsg, BeginRecordingMsg, DataRow, DataTableError, LogMsg, PathOp, - RecordingId, RecordingInfo, RecordingSource, RowId, Time, TimePoint, -}; - -#[cfg(feature = "web_viewer")] -use re_web_viewer_server::WebViewerServerPort; -use rerun::sink::LogSink; -// ---------------------------------------------------------------------------- - -#[derive(thiserror::Error, Debug)] -pub enum PythonSessionError { - #[allow(dead_code)] - #[error("The Rerun SDK was not compiled with the '{0}' feature")] - FeatureNotEnabled(&'static str), - - #[cfg(feature = "web_viewer")] - #[error("Could not start the WebViewerServer: '{0}'")] - WebViewerServerError(#[from] re_web_viewer_server::WebViewerServerError), -} - -/// Used to construct a [`RecordingInfo`]: -struct RecordingMetaData { - recording_source: RecordingSource, - application_id: Option, - recording_id: RecordingId, - is_official_example: Option, -} - -impl Default for RecordingMetaData { - fn default() -> Self { - Self { - // Will be filled in when we initialize the `rerun` python module. - recording_source: RecordingSource::Unknown, - application_id: Default::default(), - // TODO(https://github.com/rerun-io/rerun/issues/1792): ZERO is not a great choice - // here. Ideally we would use `default_recording_id(py)` instead. - recording_id: RecordingId::ZERO, - is_official_example: Default::default(), - } - } -} - -impl RecordingMetaData { - pub fn to_recording_info(&self) -> RecordingInfo { - let recording_id = self.recording_id; - - let application_id = self - .application_id - .clone() - .unwrap_or_else(ApplicationId::unknown); - - RecordingInfo { - application_id, - recording_id, - is_official_example: self.is_official_example.unwrap_or(false), - started: Time::now(), - recording_source: self.recording_source.clone(), - } - } -} - -// ---------------------------------------------------------------------------- - -/// The python API bindings create a single [`PythonSession`] -/// which is used to send log messages. -/// -/// This mirrors the Python API to a certain extent, allowing users -/// to set enable/disable logging, set application id, switch log sinks, etc. -pub struct PythonSession { - /// Is this session enabled? - /// If not, all calls into it are ignored! - enabled: bool, - - has_sent_begin_recording_msg: bool, - recording_meta_data: RecordingMetaData, - - /// Where we put the log messages. - sink: Box, - - build_info: re_build_info::BuildInfo, - - /// Used to serve the web viewer assets. - /// TODO(jleibs): Potentially use this for serve as well - #[cfg(feature = "web_viewer")] - web_viewer_server: Option, -} - -impl Default for PythonSession { - fn default() -> Self { - let default_enabled = true; - Self { - enabled: rerun::decide_logging_enabled(default_enabled), - has_sent_begin_recording_msg: false, - recording_meta_data: Default::default(), - sink: Box::new(rerun::sink::BufferedSink::new()), - build_info: re_build_info::build_info!(), - #[cfg(feature = "web_viewer")] - web_viewer_server: None, - } - } -} - -impl PythonSession { - pub fn set_python_version(&mut self, python_version: re_log_types::PythonVersion) { - self.recording_meta_data.recording_source = - re_log_types::RecordingSource::PythonSdk(python_version); - } - - /// Check if logging is enabled on this `Session`. - pub fn is_enabled(&self) -> bool { - self.enabled - } - - /// Enable or disable logging on this `Session`. - pub fn set_enabled(&mut self, enabled: bool) { - self.enabled = enabled; - } - - /// Set whether or not logging is enabled by default. - /// This will be overridden by the `RERUN` environment variable, if found. - pub fn set_default_enabled(&mut self, default_enabled: bool) { - self.enabled = rerun::decide_logging_enabled(default_enabled); - } - - /// Set the [`ApplicationId`] to use for the following stream of log messages. - /// - /// This should be called once before anything else. - /// If you don't call this, the resulting application id will be [`ApplicationId::unknown`]. - /// - /// Note that many recordings can share the same [`ApplicationId`], but - /// they all have unique [`RecordingId`]s. - pub fn set_application_id(&mut self, application_id: ApplicationId, is_official_example: bool) { - if self.recording_meta_data.application_id.as_ref() != Some(&application_id) { - self.recording_meta_data.application_id = Some(application_id); - self.recording_meta_data.is_official_example = Some(is_official_example); - self.has_sent_begin_recording_msg = false; - } - } - - /// The current [`RecordingId`]. - pub fn recording_id(&self) -> RecordingId { - self.recording_meta_data.recording_id - } - - /// Set the [`RecordingId`] of this message stream. - /// - /// If you're logging from multiple processes and want all the messages - /// to end up as the same recording, you must make sure they all set the same - /// [`RecordingId`] using this function. - /// - /// Note that many recordings can share the same [`ApplicationId`], but - /// they all have unique [`RecordingId`]s. - pub fn set_recording_id(&mut self, recording_id: RecordingId) { - if self.recording_meta_data.recording_id != recording_id { - self.recording_meta_data.recording_id = recording_id; - self.has_sent_begin_recording_msg = false; - } - } - - /// Set the [`LogSink`] to use. This is where the log messages will be sent. - /// - /// If the previous sink is [`rerun::sink::BufferedSink`] (the default), - /// it will be drained and sent to the new sink. - pub fn set_sink(&mut self, sink: Box) { - // Capture the backlog (should only apply if this was a `BufferedSink`) - let backlog = self.sink.drain_backlog(); - - // Before changing the sink, we set drop_if_disconnected and - // flush. This ensures that any messages that are currently - // buffered will be sent. - self.sink.drop_if_disconnected(); - self.sink.flush_blocking(); - self.sink = sink; - - if backlog.is_empty() { - // If we had no backlog, we need to send the `BeginRecording` message to the new sink. - self.has_sent_begin_recording_msg = false; - } else { - // Otherwise the backlog should have had the `BeginRecording` message in it already. - self.sink.send_all(backlog); - } - } - - /// Send log data to a remote viewer/server. - /// - /// Usually this is done by running the `rerun` binary (`cargo install rerun`) without arguments, - /// and then connecting to it. - /// - /// Send all currently buffered messages. - /// If we are already connected, we will re-connect to this new address. - /// - /// This function returns immediately. - /// Disconnect with [`Self::disconnect`]. - pub fn connect(&mut self, addr: SocketAddr) { - if !self.enabled { - re_log::debug!("Rerun disabled - call to connect() ignored"); - return; - } - - re_log::debug!("Connecting to remote {addr}…"); - self.set_sink(Box::new(rerun::sink::TcpSink::new(addr))); - } - - /// Send all pending and future log messages to disk as an rrd file - pub fn save( - &mut self, - path: impl Into, - ) -> Result<(), rerun::sink::FileSinkError> { - if !self.enabled { - re_log::debug!("Rerun disabled - call to save() ignored"); - return Ok(()); - } - - self.set_sink(Box::new(rerun::sink::FileSink::new(path)?)); - Ok(()) - } - - /// Send all pending and future log messages to an in-memory store - pub fn memory_recording(&mut self) -> rerun::sink::MemorySinkStorage { - if !self.enabled { - re_log::debug!("Rerun disabled - call to memory_recording() ignored"); - return Default::default(); - } - - let memory_sink = rerun::sink::MemorySink::default(); - let buffer = memory_sink.buffer(); - - self.set_sink(Box::new(memory_sink)); - self.has_sent_begin_recording_msg = false; - - buffer - } - - /// Disconnects any TCP connection, shuts down any server, and closes any file. - pub fn disconnect(&mut self) { - self.set_sink(Box::new(rerun::sink::BufferedSink::new())); - self.has_sent_begin_recording_msg = false; - } - - /// Wait until all logged data have been sent to the remove server (if any). - pub fn flush(&mut self) { - self.sink.flush_blocking(); - } - - /// Send a single [`DataRow`]. - pub fn send_row(&mut self, row: DataRow) -> PyResult<()> { - let msg = row - .into_table() - .to_arrow_msg() - .map_err(|err: DataTableError| PyValueError::new_err(err.to_string()))?; - - self.send(LogMsg::ArrowMsg(self.recording_id(), msg)); - - Ok(()) - } - - /// Send a [`LogMsg`]. - pub fn send(&mut self, log_msg: LogMsg) { - if !self.enabled { - // It's intended that the logging SDK should drop messages earlier than this if logging is disabled. This - // check here is just a safety net. - re_log::debug_once!("Logging is disabled, dropping message."); - return; - } - - if !self.has_sent_begin_recording_msg { - let info = self.recording_meta_data.to_recording_info(); - - // This shouldn't happen, but at least log an error if it does. - // See: https://github.com/rerun-io/rerun/issues/1792 - if info.recording_id == RecordingId::ZERO { - re_log::error_once!("RecordingId was still ZERO when sent to server. This is a python initialization bug."); - } - - re_log::debug!( - "Beginning new recording with application_id {:?} and recording id {}", - info.application_id.0, - info.recording_id - ); - - self.sink.send( - BeginRecordingMsg { - row_id: RowId::random(), - info, - } - .into(), - ); - self.has_sent_begin_recording_msg = true; - } - - self.sink.send(log_msg); - } - - pub fn send_arrow_msg(&mut self, arrow_msg: ArrowMsg) { - self.send(LogMsg::ArrowMsg(self.recording_id(), arrow_msg)); - } - - /// Send a [`PathOp`]. - pub fn send_path_op(&mut self, time_point: &TimePoint, path_op: PathOp) { - self.send(LogMsg::EntityPathOpMsg( - self.recording_id(), - re_log_types::EntityPathOpMsg { - row_id: RowId::random(), - time_point: time_point.clone(), - path_op, - }, - )); - } - - /// Get a url to an instance of the web-viewer - /// - /// This may point to app.rerun.io or localhost depending on - /// whether `host_assets` was called. - pub fn get_app_url(&self) -> String { - #[cfg(feature = "web_viewer")] - if let Some(hosted_assets) = &self.web_viewer_server { - return format!("http://localhost:{}", hosted_assets.port()); - } - - let short_git_hash = &self.build_info.git_hash[..7]; - format!("https://app.rerun.io/commit/{short_git_hash}") - } - - /// Start a web server to host the run web-asserts - /// - /// The caller needs to ensure that there is a `tokio` runtime running. - #[allow(clippy::unnecessary_wraps)] - #[cfg(feature = "web_viewer")] - pub fn start_web_viewer_server( - &mut self, - _web_port: WebViewerServerPort, - ) -> Result<(), PythonSessionError> { - self.web_viewer_server = Some(re_web_viewer_server::WebViewerServerHandle::new(_web_port)?); - - Ok(()) - } -} From 71a31bd39b107fc8e001168d99b436c3e5e85a44 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 27 Apr 2023 20:33:21 +0200 Subject: [PATCH 13/31] clean up old stuff from the before time --- crates/re_arrow_store/src/polars_util.rs | 2 +- crates/re_arrow_store/src/store_polars.rs | 2 +- crates/re_arrow_store/src/store_write.rs | 5 ++--- crates/re_arrow_store/tests/data_store.rs | 2 -- crates/re_data_store/src/log_db.rs | 2 +- crates/re_log_types/src/arrow_msg.rs | 3 --- crates/re_log_types/src/data_cell.rs | 2 +- crates/re_log_types/src/data_row.rs | 13 ++----------- crates/re_log_types/src/data_table.rs | 6 ------ crates/re_log_types/src/datagen.rs | 2 +- crates/re_log_types/src/lib.rs | 12 ------------ crates/re_sdk/src/msg_sender.rs | 14 -------------- 12 files changed, 9 insertions(+), 56 deletions(-) diff --git a/crates/re_arrow_store/src/polars_util.rs b/crates/re_arrow_store/src/polars_util.rs index 4e4beabbb69c..e70aa8541942 100644 --- a/crates/re_arrow_store/src/polars_util.rs +++ b/crates/re_arrow_store/src/polars_util.rs @@ -197,7 +197,7 @@ pub fn range_components<'a, const N: usize>( // --- Joins --- -// TODO(#1619): none of this mess should be here +// TODO(#1759): none of this mess should be here pub fn dataframe_from_cells( cells: &[Option; N], diff --git a/crates/re_arrow_store/src/store_polars.rs b/crates/re_arrow_store/src/store_polars.rs index 00a233c70d6f..73ad746c48c4 100644 --- a/crates/re_arrow_store/src/store_polars.rs +++ b/crates/re_arrow_store/src/store_polars.rs @@ -17,7 +17,7 @@ use crate::{ }; // TODO(#1692): all of this stuff should be defined by Data{Cell,Row,Table}, not the store. -// TODO(#1619): remove this and reimplement it on top of store serialization +// TODO(#1759): remove this and reimplement it on top of store serialization // --- diff --git a/crates/re_arrow_store/src/store_write.rs b/crates/re_arrow_store/src/store_write.rs index 677310e5043d..bc2413678027 100644 --- a/crates/re_arrow_store/src/store_write.rs +++ b/crates/re_arrow_store/src/store_write.rs @@ -15,9 +15,8 @@ use crate::{ IndexedTable, PersistentIndexedTable, }; -// TODO(#1619): -// - The store should insert column-per-column rather than row-per-row (purely a performance -// matter) +// TODO(cmc): the store should insert column-per-column rather than row-per-row (purely a +// performance matter). // --- Data store --- diff --git a/crates/re_arrow_store/tests/data_store.rs b/crates/re_arrow_store/tests/data_store.rs index 5ba2a722bf80..b7d62a44d425 100644 --- a/crates/re_arrow_store/tests/data_store.rs +++ b/crates/re_arrow_store/tests/data_store.rs @@ -24,8 +24,6 @@ use re_log_types::{ Timeline, }; -// TODO(#1619): introduce batching in the testing matrix - // --- LatestComponentsAt --- #[test] diff --git a/crates/re_data_store/src/log_db.rs b/crates/re_data_store/src/log_db.rs index 9af248b9c8e0..92dc5f7b86cc 100644 --- a/crates/re_data_store/src/log_db.rs +++ b/crates/re_data_store/src/log_db.rs @@ -61,7 +61,7 @@ impl EntityDb { let mut table = DataTable::from_arrow_msg(msg)?; table.compute_all_size_bytes(); - // TODO(#1619): batch all of this + // TODO(cmc): batch all of this for row in table.to_rows() { self.try_add_data_row(&row)?; } diff --git a/crates/re_log_types/src/arrow_msg.rs b/crates/re_log_types/src/arrow_msg.rs index b83e24266d29..e81b52777e66 100644 --- a/crates/re_log_types/src/arrow_msg.rs +++ b/crates/re_log_types/src/arrow_msg.rs @@ -11,9 +11,6 @@ use arrow2::{array::Array, chunk::Chunk, datatypes::Schema}; #[derive(Clone, Debug, PartialEq)] pub struct ArrowMsg { /// Unique identifier for the [`crate::DataTable`] in this message. - /// - /// NOTE(#1619): While we're in the process of transitioning towards end-to-end batching, the - /// `table_id` is always the same as the `row_id` as the first and only row. pub table_id: TableId, /// The maximum values for all timelines across the entire batch of data. diff --git a/crates/re_log_types/src/data_cell.rs b/crates/re_log_types/src/data_cell.rs index d8b19a9eabc4..7faa284f1d8a 100644 --- a/crates/re_log_types/src/data_cell.rs +++ b/crates/re_log_types/src/data_cell.rs @@ -134,7 +134,7 @@ pub struct DataCellInner { } // TODO(cmc): We should be able to build a cell from non-reference types. -// TODO(#1619): We shouldn't have to specify the component name separately, this should be +// TODO(#1696): We shouldn't have to specify the component name separately, this should be // part of the metadata by using an extension. // TODO(#1696): Check that the array is indeed a leaf / component type when building a cell from an // arrow payload. diff --git a/crates/re_log_types/src/data_row.rs b/crates/re_log_types/src/data_row.rs index 1850c8b913c8..bd780e01188f 100644 --- a/crates/re_log_types/src/data_row.rs +++ b/crates/re_log_types/src/data_row.rs @@ -127,12 +127,6 @@ impl RowId { pub fn random() -> Self { Self(re_tuid::Tuid::random()) } - - /// Temporary utility while we transition to batching. See #1619. - #[doc(hidden)] - pub fn into_table_id(self) -> TableId { - TableId(self.0) - } } impl SizeBytes for RowId { @@ -322,13 +316,10 @@ impl DataRow { Self::try_from_cells(row_id, timepoint, entity_path, num_instances, cells).unwrap() } - /// Turns the `DataRow` into a single-row [`DataTable`] that carries the same ID. - /// - /// This only makes sense as part of our transition to batching. See #1619. - #[doc(hidden)] + /// Turns the `DataRow` into a single-row [`DataTable`]. #[inline] pub fn into_table(self) -> DataTable { - DataTable::from_rows(self.row_id.into_table_id(), [self]) + DataTable::from_rows(TableId::random(), [self]) } } diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index adc9df0e0344..df8a907e6db6 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -156,12 +156,6 @@ impl TableId { pub fn random() -> Self { Self(re_tuid::Tuid::random()) } - - /// Temporary utility while we transition to batching. See #1619. - #[doc(hidden)] - pub fn into_row_id(self) -> RowId { - RowId(self.0) - } } impl SizeBytes for TableId { diff --git a/crates/re_log_types/src/datagen.rs b/crates/re_log_types/src/datagen.rs index 153545bc05d8..3cc2f144dc05 100644 --- a/crates/re_log_types/src/datagen.rs +++ b/crates/re_log_types/src/datagen.rs @@ -1,6 +1,6 @@ //! Generate random data for tests and benchmarks. -// TODO(#1619): It really is time for whole module to disappear. +// TODO(#1810): It really is time for whole module to disappear. use crate::{ component_types::{self, InstanceKey}, diff --git a/crates/re_log_types/src/lib.rs b/crates/re_log_types/src/lib.rs index 1171550e3bf1..578f55ba6616 100644 --- a/crates/re_log_types/src/lib.rs +++ b/crates/re_log_types/src/lib.rs @@ -195,18 +195,6 @@ pub enum LogMsg { } impl LogMsg { - pub fn id(&self) -> RowId { - match self { - Self::BeginRecordingMsg(msg) => msg.row_id, - Self::EntityPathOpMsg(_, msg) => msg.row_id, - Self::Goodbye(row_id) => *row_id, - // TODO(#1619): the following only makes sense because, while we support sending and - // receiving batches, we don't actually do so yet. - // We need to stop storing raw `LogMsg`s before we can benefit from our batching. - Self::ArrowMsg(_, msg) => msg.table_id.into_row_id(), - } - } - pub fn recording_id(&self) -> Option<&RecordingId> { match self { Self::BeginRecordingMsg(msg) => Some(&msg.info.recording_id), diff --git a/crates/re_sdk/src/msg_sender.rs b/crates/re_sdk/src/msg_sender.rs index 2269b57d1fc6..1a9b076d97d9 100644 --- a/crates/re_sdk/src/msg_sender.rs +++ b/crates/re_sdk/src/msg_sender.rs @@ -7,13 +7,8 @@ use crate::{ Component, EntityPath, RecordingStream, SerializableComponent, }; -// TODO(#1619): Rust SDK batching - // --- -// TODO: effectively this is nothing but a rowbuilder? except sometimes it creates table due to -// limitations.. - /// Errors that can occur when constructing or sending messages /// using [`MsgSender`]. #[derive(thiserror::Error, Debug)] @@ -50,12 +45,7 @@ pub enum MsgSenderError { /// .map_err(Into::into) /// } /// ``` -// TODO(#1619): this whole thing needs to be rethought to incorporate batching and datatables. pub struct MsgSender { - // TODO - // TODO(cmc): At the moment, a `MsgBundle` can only contain data for a single entity, so - // this must be known as soon as we spawn the builder. - // This won't be true anymore once batch insertions land. entity_path: EntityPath, /// All the different timestamps for this message. @@ -167,8 +157,6 @@ impl MsgSender { /// The SDK does not yet support batch insertions, which are semantically identical to adding /// the same component type multiple times in a single message. /// Doing so will return an error when trying to `send()` the message. - // - // TODO(#589): batch insertions pub fn with_component<'a, C: SerializableComponent>( mut self, data: impl IntoIterator, @@ -201,8 +189,6 @@ impl MsgSender { /// The SDK does not yet support batch insertions, which are semantically identical to adding /// the same component type multiple times in a single message. /// Doing so will return an error when trying to `send()` the message. - // - // TODO(#589): batch insertions pub fn with_splat(mut self, data: C) -> Result { if C::name() == InstanceKey::name() { return Err(MsgSenderError::SplattedInstanceKeys); From 649bbe8128e746266a757443bed7aec3456853c0 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 08:48:49 +0200 Subject: [PATCH 14/31] self-review --- crates/re_sdk/src/msg_sender.rs | 1 - crates/re_sdk/src/recording_stream.rs | 37 ++++++++++++--------------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/crates/re_sdk/src/msg_sender.rs b/crates/re_sdk/src/msg_sender.rs index 1a9b076d97d9..438fd7a54764 100644 --- a/crates/re_sdk/src/msg_sender.rs +++ b/crates/re_sdk/src/msg_sender.rs @@ -218,7 +218,6 @@ impl MsgSender { /// Consumes, packs, sanity checks and finally sends the message to the currently configured /// target of the SDK. - // TODO: wtf is this a DataTableError? pub fn send(self, rec_stream: &RecordingStream) -> Result<(), DataTableError> { if !rec_stream.is_enabled() { return Ok(()); // silently drop the message diff --git a/crates/re_sdk/src/recording_stream.rs b/crates/re_sdk/src/recording_stream.rs index 3c156ebb5501..dd7cdac7786c 100644 --- a/crates/re_sdk/src/recording_stream.rs +++ b/crates/re_sdk/src/recording_stream.rs @@ -10,10 +10,10 @@ use crate::sink::{LogSink, MemorySinkStorage}; // --- -/// Errors that can occur when creating/manipulating a [`DataTableBatcher`]. +/// Errors that can occur when creating/manipulating a [`RecordingStream`]. #[derive(thiserror::Error, Debug)] pub enum RecordingStreamError { - /// Error spawning one of the background threads. + /// Error within the underlying table batcher. #[error("Failed to spawn the underlying batcher: {0}")] DataTableBatcher(#[from] DataTableBatcherError), @@ -138,7 +138,8 @@ impl RecordingStreamBuilder { /// ## Example /// /// ```no_run - /// let rec_stream = re_sdk::RecordingStreamBuilder::new("my_app").buffered(); + /// let rec_stream = re_sdk::RecordingStreamBuilder::new("my_app").buffered()?; + /// # Ok::<(), Box>(()) /// ``` pub fn buffered(self) -> RecordingStreamResult { let (enabled, recording_info, batcher_config) = self.finalize(); @@ -160,7 +161,8 @@ impl RecordingStreamBuilder { /// ## Example /// /// ```no_run - /// let (rec_stream, storage) = re_sdk::RecordingStreamBuilder::new("my_app").memory().unwrap(); + /// let (rec_stream, storage) = re_sdk::RecordingStreamBuilder::new("my_app").memory()?; + /// # Ok::<(), Box>(()) /// ``` pub fn memory( self, @@ -173,7 +175,7 @@ impl RecordingStreamBuilder { RecordingStream::new(recording_info, batcher_config, Box::new(sink)) .map(|rec_stream| (rec_stream, storage)) } else { - re_log::debug!("Rerun disabled - call to memory_recording() ignored"); + re_log::debug!("Rerun disabled - call to memory() ignored"); Ok((RecordingStream::disabled(), Default::default())) } } @@ -185,7 +187,8 @@ impl RecordingStreamBuilder { /// /// ```no_run /// let rec_stream = re_sdk::RecordingStreamBuilder::new("my_app") - /// .connect(re_sdk::default_server_addr()); + /// .connect(re_sdk::default_server_addr())?; + /// # Ok::<(), Box>(()) /// ``` pub fn connect(self, addr: std::net::SocketAddr) -> RecordingStreamResult { let (enabled, recording_info, batcher_config) = self.finalize(); @@ -229,7 +232,8 @@ impl RecordingStreamBuilder { } } - /// Returns whether or not logging is enabled, plus a [`RecordingInfo`]. + /// Returns whether or not logging is enabled, a [`RecordingInfo`] and the associated batcher + /// configuration. /// /// This can be used to then construct a [`RecordingStream`] manually using /// [`RecordingStream::new`]. @@ -277,11 +281,13 @@ impl RecordingStreamBuilder { /// /// Data is logged into Rerun via [`LogSink`]s. /// -/// The underlying [`LogSink`] for a [`RecordingStream`] can be changed at any point during its +/// The underlying [`LogSink`] of a [`RecordingStream`] can be changed at any point during its /// lifetime by calling [`RecordingStream::set_sink`] or one of the higher level helpers -/// ([`RecordingStream::connect`], [`RecordingStream::memory_recording`], +/// ([`RecordingStream::connect`], [`RecordingStream::memory`], /// [`RecordingStream::save`], [`RecordingStream::disconnect`]). /// +/// See [`RecordingStream::set_sink`] for more information. +/// /// ## Multithreading and ordering /// /// [`RecordingStream`] can be cheaply cloned and used freely across any number of threads. @@ -409,7 +415,7 @@ impl RecordingStream { /// You can find sinks in [`crate::sink`]. /// /// See also: [`RecordingStreamBuilder`]. - #[must_use = "Recording will get closed automatically when this object is dropped"] + #[must_use = "Recording will get closed automatically once all instances of this object have been dropped"] pub fn new( info: RecordingInfo, batcher_config: DataTableBatcherConfig, @@ -665,11 +671,6 @@ impl RecordingStream { // 2. Receive pending tables from the batcher's channel this.cmds_tx.send(Command::PopPendingTables).ok(); - - // 3. Wait for all tables to have been forwarded - let (cmd, oneshot) = Command::flush(); - this.cmds_tx.send(cmd).ok(); - oneshot.recv().ok(); } /// Initiates a flush the batching pipeline and waits for it to propagate. @@ -684,11 +685,7 @@ impl RecordingStream { // NOTE: Internal channels can never be closed outside of the `Drop` impl, all these sends // are safe. - // 1. Flush the batcher down the table channel - this.batcher.flush_blocking(); - - // 2. Receive pending tables from the batcher's channel - this.cmds_tx.send(Command::PopPendingTables).ok(); + self.flush_async(); // 3. Wait for all tables to have been forwarded let (cmd, oneshot) = Command::flush(); From 2b74c3bf10f5b15fa24e6926bad3b9744e86460d Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 09:51:14 +0200 Subject: [PATCH 15/31] ordered data columns in data tables --- crates/re_arrow_store/src/store_dump.rs | 10 +++++----- crates/re_log_types/src/data_table.rs | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/re_arrow_store/src/store_dump.rs b/crates/re_arrow_store/src/store_dump.rs index a3255657e61b..e83690a478cf 100644 --- a/crates/re_arrow_store/src/store_dump.rs +++ b/crates/re_arrow_store/src/store_dump.rs @@ -1,6 +1,6 @@ -use ahash::HashMapExt; +use std::collections::BTreeMap; + use arrow2::Either; -use nohash_hasher::IntMap; use re_log_types::{ DataCellColumn, DataTable, ErasedTimeVec, RowIdVec, TableId, TimeRange, Timeline, }; @@ -51,7 +51,7 @@ impl DataStore { .take(table.num_rows() as _) .collect(), col_num_instances: col_num_instances.clone(), - columns: columns.clone(), // shallow + columns: columns.clone().into_iter().collect(), // shallow } }) } @@ -92,7 +92,7 @@ impl DataStore { .take(col_row_id.len()) .collect(), col_num_instances: col_num_instances.clone(), - columns: columns.clone(), // shallow + columns: columns.clone().into_iter().collect(), // shallow } }) }) @@ -163,7 +163,7 @@ impl DataStore { let col_num_instances = filter_column(col_time, col_num_instances.iter(), time_filter).collect(); - let mut columns2 = IntMap::with_capacity(columns.len()); + let mut columns2 = BTreeMap::default(); for (component, column) in columns { let column = filter_column(col_time, column.iter(), time_filter).collect(); columns2.insert(*component, DataCellColumn(column)); diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index df8a907e6db6..aa5cb80d29ac 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -2,7 +2,7 @@ use std::collections::BTreeMap; use ahash::HashMap; use itertools::Itertools as _; -use nohash_hasher::{IntMap, IntSet}; +use nohash_hasher::IntSet; use smallvec::SmallVec; use crate::{ @@ -347,7 +347,7 @@ pub struct DataTable { /// /// The cells are optional since not all rows will have data for every single component /// (i.e. the table is sparse). - pub columns: IntMap, + pub columns: BTreeMap, } impl DataTable { @@ -418,7 +418,7 @@ impl DataTable { } // Pre-allocate all columns (one per component). - let mut columns = IntMap::default(); + let mut columns = BTreeMap::default(); for component in components { columns.insert( component, From 34be0a78b6c16613bde59dacaf9d05c791847c8c Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 09:59:43 +0200 Subject: [PATCH 16/31] tests --- Cargo.lock | 1 + crates/re_sdk/Cargo.toml | 1 + crates/re_sdk/src/recording_stream.rs | 160 +++++++++++++++++++++++++- 3 files changed, 158 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index af835da7e69e..5bb356d4b3e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4084,6 +4084,7 @@ dependencies = [ "re_log_types", "re_memory", "re_sdk_comms", + "similar-asserts", "thiserror", ] diff --git a/crates/re_sdk/Cargo.toml b/crates/re_sdk/Cargo.toml index 635654865e0c..47c3806177b5 100644 --- a/crates/re_sdk/Cargo.toml +++ b/crates/re_sdk/Cargo.toml @@ -53,6 +53,7 @@ arrow2_convert.workspace = true ndarray.workspace = true ndarray-rand = "0.14" rand = "0.8" +similar-asserts = "1.4.2" [build-dependencies] diff --git a/crates/re_sdk/src/recording_stream.rs b/crates/re_sdk/src/recording_stream.rs index dd7cdac7786c..f134afb04758 100644 --- a/crates/re_sdk/src/recording_stream.rs +++ b/crates/re_sdk/src/recording_stream.rs @@ -745,8 +745,160 @@ impl RecordingStream { // TODO: some regression tests and we're all good // TODO: an example would probably be nice too -#[test] -fn recording_stream_impl_send_sync() { - fn assert_send_sync() {} - assert_send_sync::(); +#[cfg(test)] +mod tests { + use re_log_types::RowId; + + use super::*; + + #[test] + fn impl_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } + + #[test] + fn never_flush() { + let rec_stream = RecordingStreamBuilder::new("never_flush") + .enabled(true) + .batcher_config(DataTableBatcherConfig::NEVER) + .buffered() + .unwrap(); + + let rec_info = rec_stream.recording_info().cloned().unwrap(); + + let mut table = DataTable::example(false); + table.compute_all_size_bytes(); + for row in table.to_rows() { + rec_stream.record_row(row); + } + + let storage = rec_stream.memory(); + let mut msgs = { + let mut msgs = storage.take(); + msgs.reverse(); + msgs + }; + + // First message should be a set_recording_info resulting from the original sink swap to + // buffered mode. + match msgs.pop().unwrap() { + LogMsg::BeginRecordingMsg(msg) => { + assert!(msg.row_id != RowId::ZERO); + similar_asserts::assert_eq!(rec_info, msg.info); + } + _ => panic!("expected BeginRecordingMsg"), + } + + // Second message should be a set_recording_info resulting from the later sink swap from + // buffered mode into in-memory mode. + // This arrives _before_ the data itself since we're using manual flushing. + match msgs.pop().unwrap() { + LogMsg::BeginRecordingMsg(msg) => { + assert!(msg.row_id != RowId::ZERO); + similar_asserts::assert_eq!(rec_info, msg.info); + } + _ => panic!("expected BeginRecordingMsg"), + } + + // Third message is the batched table itself, which was sent as a result of the implicit + // flush when swapping the underlying sink from buffered to in-memory. + match msgs.pop().unwrap() { + LogMsg::ArrowMsg(rid, msg) => { + assert_eq!(rec_info.recording_id, rid); + + let mut got = DataTable::from_arrow_msg(&msg).unwrap(); + // TODO(1760): we shouldn't have to (re)do this! + got.compute_all_size_bytes(); + // NOTE: Override the resulting table's ID so they can be compared. + got.table_id = table.table_id; + + similar_asserts::assert_eq!(table, got); + } + _ => panic!("expected BeginRecordingMsg"), + } + + // That's all. + assert!(msgs.pop().is_none()); + } + + #[test] + fn always_flush() { + let rec_stream = RecordingStreamBuilder::new("always_flush") + .enabled(true) + .batcher_config(DataTableBatcherConfig::ALWAYS) + .buffered() + .unwrap(); + + let rec_info = rec_stream.recording_info().cloned().unwrap(); + + let mut table = DataTable::example(false); + table.compute_all_size_bytes(); + for row in table.to_rows() { + rec_stream.record_row(row); + } + + let storage = rec_stream.memory(); + let mut msgs = { + let mut msgs = storage.take(); + msgs.reverse(); + msgs + }; + + // First message should be a set_recording_info resulting from the original sink swap to + // buffered mode. + match msgs.pop().unwrap() { + LogMsg::BeginRecordingMsg(msg) => { + assert!(msg.row_id != RowId::ZERO); + similar_asserts::assert_eq!(rec_info, msg.info); + } + _ => panic!("expected BeginRecordingMsg"), + } + + // Second message should be a set_recording_info resulting from the later sink swap from + // buffered mode into in-memory mode. + // This arrives _before_ the data itself since we're using manual flushing. + match msgs.pop().unwrap() { + LogMsg::BeginRecordingMsg(msg) => { + assert!(msg.row_id != RowId::ZERO); + similar_asserts::assert_eq!(rec_info, msg.info); + } + _ => panic!("expected BeginRecordingMsg"), + } + + let mut rows = { + let mut rows: Vec<_> = table.to_rows().collect(); + rows.reverse(); + rows + }; + + let mut assert_next_row = || { + match msgs.pop().unwrap() { + LogMsg::ArrowMsg(rid, msg) => { + assert_eq!(rec_info.recording_id, rid); + + let mut got = DataTable::from_arrow_msg(&msg).unwrap(); + // TODO(1760): we shouldn't have to (re)do this! + got.compute_all_size_bytes(); + // NOTE: Override the resulting table's ID so they can be compared. + got.table_id = table.table_id; + + let expected = DataTable::from_rows(got.table_id, [rows.pop().unwrap()]); + + similar_asserts::assert_eq!(expected, got); + } + _ => panic!("expected BeginRecordingMsg"), + } + }; + + // 3rd, 4th and 5th messages are all the single-row batched tables themselves, which were + // sent as a result of the implicit flush when swapping the underlying sink from buffered + // to in-memory. + assert_next_row(); + assert_next_row(); + assert_next_row(); + + // That's all. + assert!(msgs.pop().is_none()); + } } From 72685fa15d79e7c591b8ff7cd322c3f547e6083d Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 10:13:40 +0200 Subject: [PATCH 17/31] even more tests --- crates/re_sdk/src/recording_stream.rs | 101 ++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 5 deletions(-) diff --git a/crates/re_sdk/src/recording_stream.rs b/crates/re_sdk/src/recording_stream.rs index f134afb04758..e93fee4c46af 100644 --- a/crates/re_sdk/src/recording_stream.rs +++ b/crates/re_sdk/src/recording_stream.rs @@ -742,9 +742,6 @@ impl RecordingStream { } } -// TODO: some regression tests and we're all good -// TODO: an example would probably be nice too - #[cfg(test)] mod tests { use re_log_types::RowId; @@ -815,7 +812,7 @@ mod tests { similar_asserts::assert_eq!(table, got); } - _ => panic!("expected BeginRecordingMsg"), + _ => panic!("expected ArrowMsg"), } // That's all. @@ -887,7 +884,7 @@ mod tests { similar_asserts::assert_eq!(expected, got); } - _ => panic!("expected BeginRecordingMsg"), + _ => panic!("expected ArrowMsg"), } }; @@ -901,4 +898,98 @@ mod tests { // That's all. assert!(msgs.pop().is_none()); } + + #[test] + fn flush_hierarchy() { + let (rec_stream, storage) = RecordingStreamBuilder::new("flush_hierarchy") + .enabled(true) + .batcher_config(DataTableBatcherConfig::NEVER) + .memory() + .unwrap(); + + let rec_info = rec_stream.recording_info().cloned().unwrap(); + + let mut table = DataTable::example(false); + table.compute_all_size_bytes(); + for row in table.to_rows() { + rec_stream.record_row(row); + } + + { + let mut msgs = { + let mut msgs = storage.take(); + msgs.reverse(); + msgs + }; + + // First message should be a set_recording_info resulting from the original sink swap + // to in-memory mode. + match msgs.pop().unwrap() { + LogMsg::BeginRecordingMsg(msg) => { + assert!(msg.row_id != RowId::ZERO); + similar_asserts::assert_eq!(rec_info, msg.info); + } + _ => panic!("expected BeginRecordingMsg"), + } + + // The underlying batcher is never flushing: there's nothing else. + assert!(msgs.pop().is_none()); + } + + // The underlying batcher is never flushing: there's nothing else. + assert!(storage.take().is_empty()); + + rec_stream.flush_blocking(); // flush the entire hierarchy + + { + let mut msgs = { + let mut msgs = storage.take(); + msgs.reverse(); + msgs + }; + + // The batched table itself, which was sent as a result of the explicit flush above. + match msgs.pop().unwrap() { + LogMsg::ArrowMsg(rid, msg) => { + assert_eq!(rec_info.recording_id, rid); + + let mut got = DataTable::from_arrow_msg(&msg).unwrap(); + // TODO(1760): we shouldn't have to (re)do this! + got.compute_all_size_bytes(); + // NOTE: Override the resulting table's ID so they can be compared. + got.table_id = table.table_id; + + similar_asserts::assert_eq!(table, got); + } + _ => panic!("expected ArrowMsg"), + } + + // That's all. + assert!(msgs.pop().is_none()); + } + } + + #[test] + fn disabled() { + let (rec_stream, storage) = RecordingStreamBuilder::new("disabled") + .enabled(false) + .batcher_config(DataTableBatcherConfig::ALWAYS) + .memory() + .unwrap(); + + let mut table = DataTable::example(false); + table.compute_all_size_bytes(); + for row in table.to_rows() { + rec_stream.record_row(row); + } + + let mut msgs = { + let mut msgs = storage.take(); + msgs.reverse(); + msgs + }; + + // That's all. + assert!(msgs.pop().is_none()); + } } From 067168f4bd7e6c8db8bfd49bfe80b768a6d5d343 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 10:40:37 +0200 Subject: [PATCH 18/31] rogue todo --- crates/re_sdk/src/recording_stream.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/re_sdk/src/recording_stream.rs b/crates/re_sdk/src/recording_stream.rs index e93fee4c46af..922722d63d1f 100644 --- a/crates/re_sdk/src/recording_stream.rs +++ b/crates/re_sdk/src/recording_stream.rs @@ -13,6 +13,10 @@ use crate::sink::{LogSink, MemorySinkStorage}; /// Errors that can occur when creating/manipulating a [`RecordingStream`]. #[derive(thiserror::Error, Debug)] pub enum RecordingStreamError { + /// Error within the underlying file sink. + #[error("Failed to create the underlying file sink: {0}")] + FileSink(#[from] re_log_encoding::FileSinkError), + /// Error within the underlying table batcher. #[error("Failed to spawn the underlying batcher: {0}")] DataTableBatcher(#[from] DataTableBatcherError), @@ -224,7 +228,7 @@ impl RecordingStreamBuilder { RecordingStream::new( recording_info, batcher_config, - Box::new(crate::sink::FileSink::new(path).unwrap()), // TODO + Box::new(crate::sink::FileSink::new(path)?), ) } else { re_log::debug!("Rerun disabled - call to save() ignored"); From b8e00655216e7bbdb6daf5659a065083a5b191ab Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 12:17:06 +0200 Subject: [PATCH 19/31] batching is now a reality --- crates/re_log_types/src/data_table.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index aa5cb80d29ac..f8d6780c5c44 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -435,12 +435,6 @@ impl DataTable { } } - if col_row_id.len() > 1 { - re_log::warn_once!( - "batching features are not ready for use, use single-row data tables instead!" - ); - } - Self { table_id, col_row_id, From 0e69707170f7c4e582ff87e025fd6c9f4dafbaa8 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 12:18:08 +0200 Subject: [PATCH 20/31] some extra peace of mind --- crates/re_log_types/src/data_cell.rs | 3 ++ crates/re_log_types/src/data_table.rs | 40 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/crates/re_log_types/src/data_cell.rs b/crates/re_log_types/src/data_cell.rs index 7faa284f1d8a..56f39a55863c 100644 --- a/crates/re_log_types/src/data_cell.rs +++ b/crates/re_log_types/src/data_cell.rs @@ -533,6 +533,9 @@ impl DataCell { inner.compute_size_bytes(); return true; } + + re_log::error_once!("cell size could _not_ be computed"); + false } } diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index f8d6780c5c44..9efe5228dc66 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -1118,3 +1118,43 @@ impl DataTable { table } } + +#[test] +fn data_table_sizes() { + use crate::{component_types::InstanceKey, Component as _}; + use arrow2::array::UInt64Array; + + { + let mut cell = DataCell::from_arrow( + InstanceKey::name(), + UInt64Array::from_vec(vec![1, 2, 3]).boxed(), + ); + cell.compute_size_bytes(); + + let row = DataRow::from_cells1(RowId::random(), "a/b/c", TimePoint::default(), 3, cell); + + let table = DataTable::from_rows(TableId::random(), [row]); + assert_eq!(244, table.heap_size_bytes()); + + let mut table = DataTable::from_arrow_msg(&table.to_arrow_msg().unwrap()).unwrap(); + table.compute_all_size_bytes(); + assert_eq!(244, table.heap_size_bytes()); + } + + { + let mut cell = DataCell::from_arrow( + InstanceKey::name(), + UInt64Array::from_vec(vec![1, 2, 3]).boxed(), + ); + cell.compute_size_bytes(); + + let row = DataRow::from_cells1(RowId::random(), "a/b/c", TimePoint::default(), 3, cell); + + let table = DataTable::from_rows(TableId::random(), [row.clone(), row]); + assert_eq!(424, table.heap_size_bytes()); + + let mut table = DataTable::from_arrow_msg(&table.to_arrow_msg().unwrap()).unwrap(); + table.compute_all_size_bytes(); + assert_eq!(424, table.heap_size_bytes()); + } +} From a7f84c87f7e8817d9ca74db14e2db977565a2827 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 17:55:27 +0200 Subject: [PATCH 21/31] revert --- crates/re_log_types/src/data_table.rs | 40 --------------------------- 1 file changed, 40 deletions(-) diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index 9efe5228dc66..f8d6780c5c44 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -1118,43 +1118,3 @@ impl DataTable { table } } - -#[test] -fn data_table_sizes() { - use crate::{component_types::InstanceKey, Component as _}; - use arrow2::array::UInt64Array; - - { - let mut cell = DataCell::from_arrow( - InstanceKey::name(), - UInt64Array::from_vec(vec![1, 2, 3]).boxed(), - ); - cell.compute_size_bytes(); - - let row = DataRow::from_cells1(RowId::random(), "a/b/c", TimePoint::default(), 3, cell); - - let table = DataTable::from_rows(TableId::random(), [row]); - assert_eq!(244, table.heap_size_bytes()); - - let mut table = DataTable::from_arrow_msg(&table.to_arrow_msg().unwrap()).unwrap(); - table.compute_all_size_bytes(); - assert_eq!(244, table.heap_size_bytes()); - } - - { - let mut cell = DataCell::from_arrow( - InstanceKey::name(), - UInt64Array::from_vec(vec![1, 2, 3]).boxed(), - ); - cell.compute_size_bytes(); - - let row = DataRow::from_cells1(RowId::random(), "a/b/c", TimePoint::default(), 3, cell); - - let table = DataTable::from_rows(TableId::random(), [row.clone(), row]); - assert_eq!(424, table.heap_size_bytes()); - - let mut table = DataTable::from_arrow_msg(&table.to_arrow_msg().unwrap()).unwrap(); - table.compute_all_size_bytes(); - assert_eq!(424, table.heap_size_bytes()); - } -} From 6e348db7426de6caa3d835f8e3efec6e195c6169 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 28 Apr 2023 17:59:12 +0200 Subject: [PATCH 22/31] lock shenanigans --- Cargo.lock | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5bb356d4b3e9..55e8ec39fdd5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -763,16 +763,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "clock" -version = "0.6.0-alpha.0" -dependencies = [ - "anyhow", - "clap 4.1.4", - "glam", - "rerun", -] - [[package]] name = "cocoa" version = "0.24.1" From 4af33427dbca63f595447cabcbf72df2f0cd492b Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 3 May 2023 22:29:43 +0200 Subject: [PATCH 23/31] merge shenanigans --- crates/re_log_types/src/data_row.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/crates/re_log_types/src/data_row.rs b/crates/re_log_types/src/data_row.rs index f773d86c2686..bd780e01188f 100644 --- a/crates/re_log_types/src/data_row.rs +++ b/crates/re_log_types/src/data_row.rs @@ -341,24 +341,6 @@ impl SizeBytes for DataRow { } } -impl SizeBytes for DataRow { - fn heap_size_bytes(&self) -> u64 { - let Self { - row_id, - timepoint, - entity_path, - num_instances, - cells, - } = self; - - row_id.heap_size_bytes() - + timepoint.heap_size_bytes() - + entity_path.heap_size_bytes() - + num_instances.heap_size_bytes() - + cells.heap_size_bytes() - } -} - impl DataRow { #[inline] pub fn row_id(&self) -> RowId { From d1e5c19d52e818236bfbae04ed7fe85a20929ce6 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 3 May 2023 23:09:20 +0200 Subject: [PATCH 24/31] address PR comments --- crates/re_sdk/src/lib.rs | 3 +++ crates/re_sdk/src/log_sink.rs | 9 +++++-- crates/re_sdk/src/recording_stream.rs | 35 +++++++++++++++++++-------- crates/rerun/src/clap.rs | 4 ++- crates/rerun/src/native_viewer.rs | 3 +++ crates/rerun/src/web_viewer.rs | 3 +++ 6 files changed, 44 insertions(+), 13 deletions(-) diff --git a/crates/re_sdk/src/lib.rs b/crates/re_sdk/src/lib.rs index ac5014fab40b..fcf8e5c31f6c 100644 --- a/crates/re_sdk/src/lib.rs +++ b/crates/re_sdk/src/lib.rs @@ -30,6 +30,9 @@ impl crate::sink::LogSink for re_log_encoding::FileSink { fn send(&self, msg: re_log_types::LogMsg) { re_log_encoding::FileSink::send(self, msg); } + + #[inline] + fn flush_blocking(&self) {} } // --------------- diff --git a/crates/re_sdk/src/log_sink.rs b/crates/re_sdk/src/log_sink.rs index 7a1d9b751813..5719e58090cc 100644 --- a/crates/re_sdk/src/log_sink.rs +++ b/crates/re_sdk/src/log_sink.rs @@ -27,8 +27,7 @@ pub trait LogSink: Send + Sync + 'static { /// Blocks until all pending data in the sink's send buffers has been fully flushed. /// /// See also [`LogSink::drop_if_disconnected`]. - #[inline] - fn flush_blocking(&self) {} + fn flush_blocking(&self); /// Drops all pending data currently sitting in the sink's send buffers if it is unable to /// flush it for any reason (e.g. a broken TCP connection for a [`TcpSink`]). @@ -65,6 +64,9 @@ impl LogSink for BufferedSink { fn drain_backlog(&self) -> Vec { std::mem::take(&mut self.0.lock()) } + + #[inline] + fn flush_blocking(&self) {} } /// Store log messages directly in memory. @@ -95,6 +97,9 @@ impl LogSink for MemorySink { fn send_all(&self, mut messages: Vec) { self.0.write().append(&mut messages); } + + #[inline] + fn flush_blocking(&self) {} } /// The storage used by [`MemorySink`]. diff --git a/crates/re_sdk/src/recording_stream.rs b/crates/re_sdk/src/recording_stream.rs index 922722d63d1f..ea656e9d6c78 100644 --- a/crates/re_sdk/src/recording_stream.rs +++ b/crates/re_sdk/src/recording_stream.rs @@ -146,7 +146,7 @@ impl RecordingStreamBuilder { /// # Ok::<(), Box>(()) /// ``` pub fn buffered(self) -> RecordingStreamResult { - let (enabled, recording_info, batcher_config) = self.finalize(); + let (enabled, recording_info, batcher_config) = self.into_args(); if enabled { RecordingStream::new( recording_info, @@ -174,7 +174,7 @@ impl RecordingStreamBuilder { let sink = crate::log_sink::MemorySink::default(); let storage = sink.buffer(); - let (enabled, recording_info, batcher_config) = self.finalize(); + let (enabled, recording_info, batcher_config) = self.into_args(); if enabled { RecordingStream::new(recording_info, batcher_config, Box::new(sink)) .map(|rec_stream| (rec_stream, storage)) @@ -195,7 +195,7 @@ impl RecordingStreamBuilder { /// # Ok::<(), Box>(()) /// ``` pub fn connect(self, addr: std::net::SocketAddr) -> RecordingStreamResult { - let (enabled, recording_info, batcher_config) = self.finalize(); + let (enabled, recording_info, batcher_config) = self.into_args(); if enabled { RecordingStream::new( recording_info, @@ -222,7 +222,7 @@ impl RecordingStreamBuilder { self, path: impl Into, ) -> RecordingStreamResult { - let (enabled, recording_info, batcher_config) = self.finalize(); + let (enabled, recording_info, batcher_config) = self.into_args(); if enabled { RecordingStream::new( @@ -241,7 +241,7 @@ impl RecordingStreamBuilder { /// /// This can be used to then construct a [`RecordingStream`] manually using /// [`RecordingStream::new`]. - pub fn finalize(self) -> (bool, RecordingInfo, DataTableBatcherConfig) { + pub fn into_args(self) -> (bool, RecordingInfo, DataTableBatcherConfig) { let Self { application_id, recording_id, @@ -448,6 +448,8 @@ fn forwarding_thread( cmds_rx: Receiver, tables: Receiver, ) { + /// Returns `true` to indicate that processing can continue; i.e. `false` means immediate + /// shutdown. fn handle_cmd(info: &RecordingInfo, cmd: Command, sink: &mut Box) -> bool { match cmd { Command::RecordMsg(msg) => { @@ -491,7 +493,8 @@ fn forwarding_thread( drop(oneshot); // signals the oneshot } Command::PopPendingTables => { - // No need to do anything, just skip the current iteration. + // Wake up and skip the current iteration so that we can drain all pending tables + // before handling the next command. } Command::Shutdown => return false, } @@ -670,11 +673,19 @@ impl RecordingStream { // NOTE: Internal channels can never be closed outside of the `Drop` impl, all these sends // are safe. - // 1. Flush the batcher down the table channel + // 1. Synchronously flush the batcher down the table channel + // + // NOTE: This _hash_ to be done synchronously as we need to be guaranteed that all tables + // are ready to be trained by the time this call returns. + // It cannot block indefinitely and is fairly fast as it only requires compute (no I/O). this.batcher.flush_blocking(); - // 2. Receive pending tables from the batcher's channel + // 2. Drain all pending tables from the batcher's channel _before_ any other future command this.cmds_tx.send(Command::PopPendingTables).ok(); + + // 3. Asynchronously flush everything down the sink + let (cmd, _) = Command::flush(); + this.cmds_tx.send(cmd).ok(); } /// Initiates a flush the batching pipeline and waits for it to propagate. @@ -689,9 +700,13 @@ impl RecordingStream { // NOTE: Internal channels can never be closed outside of the `Drop` impl, all these sends // are safe. - self.flush_async(); + // 1. Flush the batcher down the table channel + this.batcher.flush_blocking(); + + // 2. Drain all pending tables from the batcher's channel _before_ any other future command + this.cmds_tx.send(Command::PopPendingTables).ok(); - // 3. Wait for all tables to have been forwarded + // 3. Wait for all tables to have been forwarded down the sink let (cmd, oneshot) = Command::flush(); this.cmds_tx.send(cmd).ok(); oneshot.recv().ok(); diff --git a/crates/rerun/src/clap.rs b/crates/rerun/src/clap.rs index 6664a8f091c6..ea883f0a59a4 100644 --- a/crates/rerun/src/clap.rs +++ b/crates/rerun/src/clap.rs @@ -94,7 +94,7 @@ impl RerunArgs { let (rerun_enabled, recording_info, batcher_config) = crate::RecordingStreamBuilder::new(application_id) .default_enabled(default_enabled) - .finalize(); + .into_args(); if !rerun_enabled { run(RecordingStream::disabled()); @@ -126,6 +126,8 @@ impl RerunArgs { let rec_stream = RecordingStream::new(recording_info, batcher_config, sink)?; run(rec_stream.clone()); + // The user callback is done executing, it's a good opportunity to flush the pipeline + // independently of the current flush thresholds (which might be `NEVER`). rec_stream.flush_async(); #[cfg(feature = "web_viewer")] diff --git a/crates/rerun/src/native_viewer.rs b/crates/rerun/src/native_viewer.rs index b36ba9d1dc1a..2999879f1f92 100644 --- a/crates/rerun/src/native_viewer.rs +++ b/crates/rerun/src/native_viewer.rs @@ -92,4 +92,7 @@ impl re_sdk::sink::LogSink for NativeViewerSink { re_log::error_once!("Failed to send log message to viewer: {err}"); } } + + #[inline] + fn flush_blocking(&self) {} } diff --git a/crates/rerun/src/web_viewer.rs b/crates/rerun/src/web_viewer.rs index bc1fc6f3396e..3910b85214cf 100644 --- a/crates/rerun/src/web_viewer.rs +++ b/crates/rerun/src/web_viewer.rs @@ -78,6 +78,9 @@ impl crate::sink::LogSink for WebViewerSink { re_log::error_once!("Failed to send log message to web server: {err}"); } } + + #[inline] + fn flush_blocking(&self) {} } // ---------------------------------------------------------------------------- From a9feba4e9af4dc52a26cc542f031837223c5adc0 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Sun, 30 Apr 2023 19:08:42 +0200 Subject: [PATCH 25/31] Restore `start_web_viewer_server` functionality --- rerun_py/docs/gen_common_index.py | 3 +- rerun_py/src/python_bridge.rs | 131 ++++++++++++++++-------------- 2 files changed, 69 insertions(+), 65 deletions(-) diff --git a/rerun_py/docs/gen_common_index.py b/rerun_py/docs/gen_common_index.py index b159a49858bd..10702ed0a91d 100644 --- a/rerun_py/docs/gen_common_index.py +++ b/rerun_py/docs/gen_common_index.py @@ -7,7 +7,6 @@ Function | Description -------- | ----------- [rerun.init()](initialization/#rerun.init) | Initialize the Rerun SDK ... -[rerun.set_recording_id()](initialization/#rerun.set_recording_id) | Set the recording ID ... [rerun.connect()](initialization/#rerun.connect) | Connect to a remote Rerun Viewer on the ... [rerun.spawn()](initialization/#rerun.spawn) | Spawn a Rerun Viewer ... ... @@ -54,7 +53,7 @@ class Section: Section( title="Viewer Control", module_summary=None, - func_list=["set_recording_id", "save"], + func_list=["save"], ), Section( title="Time", diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index 2e9a8cb19ab2..98a303c09784 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -39,13 +39,31 @@ use crate::arrow::get_registered_component_names; // --- FFI --- -// TODO -/// The global [`RecordingStream`] object used by the Python API. +/// The global [`RecordingStream`] object used by the Python API for data. fn global_data_stream() -> parking_lot::MutexGuard<'static, Option> { use once_cell::sync::OnceCell; use parking_lot::Mutex; - static REC_CTX: OnceCell>> = OnceCell::new(); - REC_CTX.get_or_init(Default::default).lock() + static DATA_STREAM: OnceCell>> = OnceCell::new(); + DATA_STREAM.get_or_init(Default::default).lock() +} + +/// The global [`RecordingStream`] object used by the Python API for blueprints. +#[allow(dead_code)] +fn global_blueprint_stream() -> parking_lot::MutexGuard<'static, Option> { + use once_cell::sync::OnceCell; + use parking_lot::Mutex; + static BP_STREAM: OnceCell>> = OnceCell::new(); + BP_STREAM.get_or_init(Default::default).lock() +} + +#[cfg(feature = "web_viewer")] +fn global_web_viewer_server( +) -> parking_lot::MutexGuard<'static, Option> { + use once_cell::sync::OnceCell; + use parking_lot::Mutex; + static WEB_HANDLE: OnceCell>> = + OnceCell::new(); + WEB_HANDLE.get_or_init(Default::default).lock() } #[pyfunction] @@ -315,21 +333,20 @@ impl PyMemorySinkStorage { } } +#[must_use = "the tokio_runtime guard must be kept alive while using tokio"] +fn enter_tokio_runtime() -> tokio::runtime::EnterGuard<'static> { + use once_cell::sync::Lazy; + static TOKIO_RUNTIME: Lazy = + Lazy::new(|| tokio::runtime::Runtime::new().expect("Failed to create tokio runtime")); + TOKIO_RUNTIME.enter() +} + /// Serve a web-viewer. #[allow(clippy::unnecessary_wraps)] // False positive #[pyfunction] fn serve(open_browser: bool, web_port: Option, ws_port: Option) -> PyResult<()> { #[cfg(feature = "web_viewer")] { - #[must_use = "the tokio_runtime guard must be kept alive while using tokio"] - fn enter_tokio_runtime() -> tokio::runtime::EnterGuard<'static> { - use once_cell::sync::Lazy; - static TOKIO_RUNTIME: Lazy = Lazy::new(|| { - tokio::runtime::Runtime::new().expect("Failed to create tokio runtime") - }); - TOKIO_RUNTIME.enter() - } - let rec_stream = global_data_stream(); let Some(rec_stream) = rec_stream.as_ref() else { no_active_recording("serve"); @@ -397,39 +414,34 @@ fn flush(py: Python<'_>) { #[pyfunction] // TODO(jleibs) expose this as a python type +/// Start a web server to host the run web-assets fn start_web_viewer_server(port: u16) -> PyResult<()> { - Ok(()) + #[allow(clippy::unnecessary_wraps)] + #[cfg(feature = "web_viewer")] + { + let mut web_handle = global_web_viewer_server(); - // /// Start a web server to host the run web-assets - // /// - // /// The caller needs to ensure that there is a `tokio` runtime running. - // #[allow(clippy::unnecessary_wraps)] - // #[cfg(feature = "web_viewer")] - // pub fn start_web_viewer_server( - // &mut self, - // _web_port: WebViewerServerPort, - // ) -> Result<(), PythonSessionError> { - // self.web_viewer_server = Some(re_web_viewer_server::WebViewerServerHandle::new(_web_port)?); - - // Ok(()) - // } - - // #[cfg(feature = "web_viewer")] - // { - // let mut session = python_session(); - // let _guard = enter_tokio_runtime(); - // session - // .start_web_viewer_server(WebViewerServerPort(port)) - // .map_err(|err| PyRuntimeError::new_err(err.to_string())) - // } - - // #[cfg(not(feature = "web_viewer"))] - // { - // _ = port; - // Err(PyRuntimeError::new_err( - // "The Rerun SDK was not compiled with the 'web_viewer' feature", - // )) - // } + let _guard = enter_tokio_runtime(); + *web_handle = Some( + re_web_viewer_server::WebViewerServerHandle::new(WebViewerServerPort(port)).map_err( + |err| { + PyRuntimeError::new_err(format!( + "Failed to start web viewer server on port {port}: {err}", + )) + }, + )?, + ); + + Ok(()) + } + + #[cfg(not(feature = "web_viewer"))] + { + _ = port; + Err(PyRuntimeError::new_err( + "The Rerun SDK was not compiled with the 'web_viewer' feature", + )) + } } // --- Time --- @@ -1131,26 +1143,19 @@ fn get_recording_id() -> PyResult { } #[pyfunction] +/// Get a url to an instance of the web-viewer +/// +/// This may point to app.rerun.io or localhost depending on +/// whether `host_assets` was called. fn get_app_url() -> String { - // TODO - "TODO".to_owned() - - // /// Get a url to an instance of the web-viewer - // /// - // /// This may point to app.rerun.io or localhost depending on - // /// whether `host_assets` was called. - // pub fn get_app_url(&self) -> String { - // #[cfg(feature = "web_viewer")] - // if let Some(hosted_assets) = &self.web_viewer_server { - // return format!("http://localhost:{}", hosted_assets.port()); - // } - - // let short_git_hash = &self.build_info.git_hash[..7]; - // format!("https://app.rerun.io/commit/{short_git_hash}") - // } - - // let session = python_session(); - // session.get_app_url() + #[cfg(feature = "web_viewer")] + if let Some(hosted_assets) = &*global_web_viewer_server() { + return format!("http://localhost:{}", hosted_assets.port()); + } + + let build_info = re_build_info::build_info!(); + let short_git_hash = &build_info.git_hash[..7]; + format!("https://app.rerun.io/commit/{short_git_hash}") } // --- Helpers --- From 9eba5c3ca70536f35dc446641e8154a63e9db309 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 4 May 2023 17:06:10 +0200 Subject: [PATCH 26/31] clean up --- rerun_py/src/python_bridge.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index 98a303c09784..d11bd7b02b64 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -155,7 +155,6 @@ fn no_active_recording(origin: &str) { // --- Init --- -// TODO: actual python object for RecordingStream + make_default logic etc #[pyfunction] #[pyo3(signature = ( application_id, @@ -170,7 +169,6 @@ fn init( application_path: Option, default_enabled: bool, ) -> PyResult<()> { - // TODO: pass this to the builder and let it do that logic // The sentinel file we use to identify the official examples directory. const SENTINEL_FILENAME: &str = ".rerun_examples"; let is_official_example = application_path.map_or(false, |mut path| { @@ -396,7 +394,6 @@ fn disconnect(py: Python<'_>) { }); } -// TODO /// Block until outstanding data has been flushed to the sink #[pyfunction] fn flush(py: Python<'_>) { @@ -1120,7 +1117,7 @@ fn log_arrow_msg(entity_path: &str, components: &PyDict, timeless: bool) -> PyRe return Ok(()); }; - // TODO: do this well + // TODO(cmc): batch all the way to here at some point, maybe. for row in data_table.to_rows() { record_row(rec_stream, row); } From 8a6f14ab35adabe5c4d326d025c70e4d35dfd5fe Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 4 May 2023 18:51:01 +0200 Subject: [PATCH 27/31] per-thread per-recording stateful time tracking --- Cargo.lock | 1 + crates/re_sdk/Cargo.toml | 1 + crates/re_sdk/src/recording_stream.rs | 155 ++++++++++++++++- examples/python/clock/main.py | 2 +- rerun_py/src/python_bridge.rs | 239 +++++++++++--------------- 5 files changed, 255 insertions(+), 143 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2a5bf1e19b6..30b58b3d3036 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4075,6 +4075,7 @@ dependencies = [ name = "re_sdk" version = "0.6.0-alpha.0" dependencies = [ + "ahash 0.8.2", "arrow2_convert", "crossbeam", "document-features", diff --git a/crates/re_sdk/Cargo.toml b/crates/re_sdk/Cargo.toml index 430a60322e9a..5ede53ae620f 100644 --- a/crates/re_sdk/Cargo.toml +++ b/crates/re_sdk/Cargo.toml @@ -41,6 +41,7 @@ re_log.workspace = true re_memory.workspace = true re_sdk_comms = { workspace = true, features = ["client"] } +ahash.workspace = true crossbeam.workspace = true document-features = "0.2" parking_lot.workspace = true diff --git a/crates/re_sdk/src/recording_stream.rs b/crates/re_sdk/src/recording_stream.rs index 2c77ddfe4548..9229b96132a7 100644 --- a/crates/re_sdk/src/recording_stream.rs +++ b/crates/re_sdk/src/recording_stream.rs @@ -1,9 +1,11 @@ use std::sync::Arc; +use ahash::HashMap; use crossbeam::channel::{Receiver, Sender}; use re_log_types::{ ApplicationId, DataRow, DataTable, DataTableBatcher, DataTableBatcherConfig, - DataTableBatcherError, LogMsg, RecordingId, RecordingInfo, RecordingSource, Time, + DataTableBatcherError, LogMsg, RecordingId, RecordingInfo, RecordingSource, Time, TimeInt, + TimePoint, TimeType, Timeline, TimelineName, }; use crate::sink::{LogSink, MemorySinkStorage}; @@ -325,9 +327,6 @@ struct RecordingStreamInner { batcher: DataTableBatcher, batcher_to_sink_handle: Option>, - // - // TODO(emilk): add convenience `TimePoint` here so that users can - // do things like `session.set_time_sequence("frame", frame_idx);` } impl Drop for RecordingStreamInner { @@ -761,6 +760,154 @@ impl RecordingStream { } } +// --- Stateful time --- + +/// Thread-local data. +#[derive(Default)] +struct ThreadInfo { + /// The current time per-thread per-recording, which can be set by users. + timepoints: HashMap, +} + +impl ThreadInfo { + fn thread_now(rid: RecordingId) -> TimePoint { + Self::with(|ti| ti.now(rid)) + } + + fn set_thread_time(rid: RecordingId, timeline: Timeline, time_int: Option) { + Self::with(|ti| ti.set_time(rid, timeline, time_int)); + } + + fn reset_thread_time(rid: RecordingId) { + Self::with(|ti| ti.reset_time(rid)); + } + + /// Get access to the thread-local [`ThreadInfo`]. + fn with(f: impl FnOnce(&mut ThreadInfo) -> R) -> R { + use std::cell::RefCell; + thread_local! { + static THREAD_INFO: RefCell> = RefCell::new(None); + } + + THREAD_INFO.with(|thread_info| { + let mut thread_info = thread_info.borrow_mut(); + let thread_info = thread_info.get_or_insert_with(ThreadInfo::default); + f(thread_info) + }) + } + + fn now(&self, rid: RecordingId) -> TimePoint { + let mut timepoint = self.timepoints.get(&rid).cloned().unwrap_or_default(); + timepoint.insert(Timeline::log_time(), Time::now().into()); + timepoint + } + + fn set_time(&mut self, rid: RecordingId, timeline: Timeline, time_int: Option) { + if let Some(time_int) = time_int { + self.timepoints + .entry(rid) + .or_default() + .insert(timeline, time_int); + } else if let Some(timepoint) = self.timepoints.get_mut(&rid) { + timepoint.remove(&timeline); + } + } + + fn reset_time(&mut self, rid: RecordingId) { + if let Some(timepoint) = self.timepoints.get_mut(&rid) { + *timepoint = TimePoint::default(); + } + } +} + +impl RecordingStream { + /// Returns the current time of the recording on the current thread. + pub fn now(&self) -> TimePoint { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to now() ignored"); + return TimePoint::default(); + }; + + ThreadInfo::thread_now(this.info.recording_id) + } + + /// Set the current time of the recording, for the current calling thread. + /// Used for all subsequent logging performed from this same thread, until the next call to + /// [`Self::set_time_sequence`]. + /// + /// For example: `rec.set_time_sequence("frame_nr", frame_nr)`. + /// + /// You can remove a timeline again using `set_time_sequence("frame_nr", None)`. + pub fn set_time_sequence(&self, timeline: impl Into, sequence: Option) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to set_time_sequence() ignored"); + return; + }; + + ThreadInfo::set_thread_time( + this.info.recording_id, + Timeline::new(timeline, TimeType::Sequence), + sequence.map(TimeInt::from), + ); + } + + /// Set the current time of the recording, for the current calling thread. + /// Used for all subsequent logging performed from this same thread, until the next call to + /// [`Self::set_time_seconds`]. + /// + /// For example: `rec.set_time_seconds("sim_time", sim_time_secs)`. + /// + /// You can remove a timeline again using `rec.set_time_seconds("sim_time", None)`. + pub fn set_time_seconds(&self, timeline: &str, seconds: Option) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to set_time_seconds() ignored"); + return; + }; + + ThreadInfo::set_thread_time( + this.info.recording_id, + Timeline::new(timeline, TimeType::Time), + seconds.map(|secs| Time::from_seconds_since_epoch(secs).into()), + ); + } + + /// Set the current time of the recording, for the current calling thread. + /// Used for all subsequent logging performed from this same thread, until the next call to + /// [`Self::set_time_nanos`]. + /// + /// For example: `rec.set_time_seconds("sim_time", sim_time_nanos)`. + /// + /// You can remove a timeline again using `rec.set_time_seconds("sim_time", None)`. + pub fn set_time_nanos(&self, timeline: &str, ns: Option) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to set_time_nanos() ignored"); + return; + }; + + ThreadInfo::set_thread_time( + this.info.recording_id, + Timeline::new(timeline, TimeType::Time), + ns.map(|ns| Time::from_ns_since_epoch(ns).into()), + ); + } + + /// Clears out the current time of the recording, for the current calling thread. + /// Used for all subsequent logging performed from this same thread, until the next call to + /// [`Self::set_time_sequence`]/[`Self::set_time_seconds`]/[`Self::set_time_nanos`]. + /// + /// For example: `rec.reset_time()`. + pub fn reset_time(&self) { + let Some(this) = &*self.inner else { + re_log::debug!("Recording disabled - call to reset_time() ignored"); + return; + }; + + ThreadInfo::reset_thread_time(this.info.recording_id); + } +} + +// --- + #[cfg(test)] mod tests { use re_log_types::RowId; diff --git a/examples/python/clock/main.py b/examples/python/clock/main.py index 50aa1e8cda1b..2b5fb96e4c93 100755 --- a/examples/python/clock/main.py +++ b/examples/python/clock/main.py @@ -59,7 +59,7 @@ def rotate(angle: float, len: float) -> Tuple[float, float, float]: scaled_h = (t_secs % 43200) / 43200.0 point_h = np.array(rotate(math.tau * scaled_h, LENGTH_H)) - color_h = (int(255 - (scaled_h * 255)), int(scaled_h * 255), 255, 128) + color_h = (int(255 - (scaled_h * 255)), int(scaled_h * 255), 255, 255) rr.log_point("world/hours_pt", position=point_h, color=color_h) rr.log_arrow("world/hours_hand", origin=[0.0, 0.0, 0.0], vector=point_h, color=color_h, width_scale=WIDTH_H) diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index d11bd7b02b64..4e23046e5024 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -15,7 +15,7 @@ use re_log_types::DataRow; use rerun::{ log::{PathOp, RowId}, sink::MemorySinkStorage, - time::{Time, TimeInt, TimePoint, TimeType, Timeline}, + time::TimePoint, EntityPath, RecordingId, RecordingStream, RecordingStreamBuilder, }; @@ -39,6 +39,8 @@ use crate::arrow::get_registered_component_names; // --- FFI --- +// we don't even need mutexes... + /// The global [`RecordingStream`] object used by the Python API for data. fn global_data_stream() -> parking_lot::MutexGuard<'static, Option> { use once_cell::sync::OnceCell; @@ -192,8 +194,8 @@ fn init( default_recording_id(py) }; - let mut rec_stream = global_data_stream(); - *rec_stream = RecordingStreamBuilder::new(application_id) + let mut data_stream = global_data_stream(); + *data_stream = RecordingStreamBuilder::new(application_id) .is_official_example(is_official_example) .recording_id(recording_id) .recording_source(re_log_types::RecordingSource::PythonSdk(python_version(py))) @@ -260,7 +262,7 @@ authkey = multiprocessing.current_process().authkey fn is_enabled() -> bool { global_data_stream() .as_ref() - .map_or(false, |rec_stream| rec_stream.is_enabled()) + .map_or(false, |data_stream| data_stream.is_enabled()) } #[pyfunction] @@ -275,8 +277,8 @@ fn shutdown(py: Python<'_>) { #[pyfunction] fn connect(addr: Option) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("connect"); return Ok(()); }; @@ -287,20 +289,20 @@ fn connect(addr: Option) -> PyResult<()> { rerun::default_server_addr() }; - rec_stream.connect(addr); + data_stream.connect(addr); Ok(()) } #[pyfunction] fn save(path: &str) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("save"); return Ok(()); }; - rec_stream + data_stream .save(path) .map_err(|err| PyRuntimeError::new_err(err.to_string())) } @@ -308,13 +310,13 @@ fn save(path: &str) -> PyResult<()> { /// Create an in-memory rrd file #[pyfunction] fn memory_recording() -> PyMemorySinkStorage { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("memory_recording"); return Default::default(); }; - PyMemorySinkStorage(rec_stream.memory()) + PyMemorySinkStorage(data_stream.memory()) } #[pyclass] @@ -331,6 +333,7 @@ impl PyMemorySinkStorage { } } +#[cfg(feature = "web_viewer")] #[must_use = "the tokio_runtime guard must be kept alive while using tokio"] fn enter_tokio_runtime() -> tokio::runtime::EnterGuard<'static> { use once_cell::sync::Lazy; @@ -345,15 +348,15 @@ fn enter_tokio_runtime() -> tokio::runtime::EnterGuard<'static> { fn serve(open_browser: bool, web_port: Option, ws_port: Option) -> PyResult<()> { #[cfg(feature = "web_viewer")] { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("serve"); return Ok(()); }; let _guard = enter_tokio_runtime(); - rec_stream.set_sink( + data_stream.set_sink( rerun::web_viewer::new_sink( open_browser, web_port.map(WebViewerServerPort).unwrap_or_default(), @@ -385,12 +388,12 @@ fn disconnect(py: Python<'_>) { // Release the GIL in case any flushing behavior needs to // cleanup a python object. py.allow_threads(|| { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("disconnect"); return; }; - rec_stream.disconnect(); + data_stream.disconnect(); }); } @@ -400,12 +403,12 @@ fn flush(py: Python<'_>) { // Release the GIL in case any flushing behavior needs to // cleanup a python object. py.allow_threads(|| { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("flush"); return; }; - rec_stream.flush_blocking(); + data_stream.flush_blocking(); }); } @@ -443,92 +446,52 @@ fn start_web_viewer_server(port: u16) -> PyResult<()> { // --- Time --- -/// Set the current time globally. Used for all subsequent logging, -/// until the next call to `set_time_sequence`. -/// -/// For example: `set_time_sequence("frame_nr", frame_nr)`. -/// -/// You can remove a timeline again using `set_time_sequence("frame_nr", None)`. +fn time(timeless: bool, rec: &RecordingStream) -> TimePoint { + if timeless { + TimePoint::timeless() + } else { + rec.now() + } +} + #[pyfunction] fn set_time_sequence(timeline: &str, sequence: Option) { - ThreadInfo::set_thread_time( - Timeline::new(timeline, TimeType::Sequence), - sequence.map(TimeInt::from), - ); + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { + no_active_recording("set_time_sequence"); + return; + }; + data_stream.set_time_sequence(timeline, sequence); } #[pyfunction] fn set_time_seconds(timeline: &str, seconds: Option) { - ThreadInfo::set_thread_time( - Timeline::new(timeline, TimeType::Time), - seconds.map(|secs| Time::from_seconds_since_epoch(secs).into()), - ); + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { + no_active_recording("set_time_seconds"); + return; + }; + data_stream.set_time_seconds(timeline, seconds); } #[pyfunction] -fn set_time_nanos(timeline: &str, ns: Option) { - ThreadInfo::set_thread_time( - Timeline::new(timeline, TimeType::Time), - ns.map(|ns| Time::from_ns_since_epoch(ns).into()), - ); +fn set_time_nanos(timeline: &str, nanos: Option) { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { + no_active_recording("set_time_nanos"); + return; + }; + data_stream.set_time_nanos(timeline, nanos); } #[pyfunction] fn reset_time() { - ThreadInfo::reset_thread_time(); -} - -/// Thread-local info -#[derive(Default)] -struct ThreadInfo { - /// The current time, which can be set by users. - time_point: TimePoint, -} - -impl ThreadInfo { - pub fn thread_now() -> TimePoint { - Self::with(|ti| ti.now()) - } - - pub fn set_thread_time(timeline: Timeline, time_int: Option) { - Self::with(|ti| ti.set_time(timeline, time_int)); - } - - pub fn reset_thread_time() { - Self::with(|ti| ti.reset_time()); - } - - /// Get access to the thread-local [`ThreadInfo`]. - fn with(f: impl FnOnce(&mut ThreadInfo) -> R) -> R { - use std::cell::RefCell; - thread_local! { - static THREAD_INFO: RefCell> = RefCell::new(None); - } - - THREAD_INFO.with(|thread_info| { - let mut thread_info = thread_info.borrow_mut(); - let thread_info = thread_info.get_or_insert_with(ThreadInfo::default); - f(thread_info) - }) - } - - fn now(&self) -> TimePoint { - let mut time_point = self.time_point.clone(); - time_point.insert(Timeline::log_time(), Time::now().into()); - time_point - } - - fn set_time(&mut self, timeline: Timeline, time_int: Option) { - if let Some(time_int) = time_int { - self.time_point.insert(timeline, time_int); - } else { - self.time_point.remove(&timeline); - } - } - - fn reset_time(&mut self) { - self.time_point = TimePoint::default(); - } + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { + no_active_recording("reset_time"); + return; + }; + data_stream.reset_time(); } // --- Log transforms --- @@ -582,8 +545,8 @@ fn log_transform( transform: re_log_types::Transform, timeless: bool, ) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("log_transform"); return Ok(()); }; @@ -592,7 +555,7 @@ fn log_transform( if entity_path.is_root() { return Err(PyTypeError::new_err("Transforms are between a child entity and its parent, so the root cannot have a transform")); } - let time_point = time(timeless); + let time_point = time(timeless, data_stream); // We currently log arrow transforms from inside the bridge because we are // using glam and macaw to potentially do matrix-inversion as part of the @@ -608,7 +571,7 @@ fn log_transform( [transform].as_slice(), ); - record_row(rec_stream, row); + record_row(data_stream, row); Ok(()) } @@ -660,8 +623,8 @@ fn log_view_coordinates( coordinates: ViewCoordinates, timeless: bool, ) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { re_log::debug!("No active recording - call to log_view_coordinates() ignored"); return Ok(()); }; @@ -677,7 +640,7 @@ fn log_view_coordinates( parse_entity_path(entity_path_str)? }; - let time_point = time(timeless); + let time_point = time(timeless, data_stream); // We currently log view coordinates from inside the bridge because the code // that does matching and validation on different string representations is @@ -693,7 +656,7 @@ fn log_view_coordinates( [coordinates].as_slice(), ); - record_row(rec_stream, row); + record_row(data_stream, row); Ok(()) } @@ -725,8 +688,8 @@ fn log_annotation_context( class_descriptions: Vec, timeless: bool, ) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { re_log::debug!("No active recording - call to log_annotation_context() ignored"); return Ok(()); }; @@ -757,7 +720,7 @@ fn log_annotation_context( }); } - let time_point = time(timeless); + let time_point = time(timeless, data_stream); // We currently log AnnotationContext from inside the bridge because it's a // fairly complex type with a need for a fair amount of data-validation. We @@ -774,7 +737,7 @@ fn log_annotation_context( [annotation_context].as_slice(), ); - record_row(rec_stream, row); + record_row(data_stream, row); Ok(()) } @@ -791,8 +754,8 @@ fn log_meshes( albedo_factors: Vec>>, timeless: bool, ) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("log_meshes"); return Ok(()); }; @@ -816,7 +779,7 @@ fn log_meshes( ))); } - let time_point = time(timeless); + let time_point = time(timeless, data_stream); let mut meshes = Vec::with_capacity(position_buffers.len()); @@ -896,7 +859,7 @@ fn log_meshes( meshes, ); - record_row(rec_stream, row); + record_row(data_stream, row); Ok(()) } @@ -909,8 +872,8 @@ fn log_mesh_file( transform: numpy::PyReadonlyArray2<'_, f32>, timeless: bool, ) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("log_mesh_file"); return Ok(()); }; @@ -953,7 +916,7 @@ fn log_mesh_file( ] }; - let time_point = time(timeless); + let time_point = time(timeless, data_stream); let mesh3d = Mesh3D::Encoded(EncodedMesh3D { mesh_id: MeshId::random(), @@ -977,7 +940,7 @@ fn log_mesh_file( [mesh3d].as_slice(), ); - record_row(rec_stream, row); + record_row(data_stream, row); Ok(()) } @@ -994,8 +957,8 @@ fn log_image_file( img_format: Option<&str>, timeless: bool, ) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("log_image_file"); return Ok(()); }; @@ -1047,7 +1010,7 @@ fn log_image_file( } }; - let time_point = time(timeless); + let time_point = time(timeless, data_stream); let tensor = re_log_types::component_types::Tensor { tensor_id: TensorId::random(), @@ -1069,7 +1032,7 @@ fn log_image_file( [tensor].as_slice(), ); - record_row(rec_stream, row); + record_row(data_stream, row); Ok(()) } @@ -1087,16 +1050,16 @@ enum TensorDataMeaning { #[pyfunction] fn log_cleared(entity_path: &str, recursive: bool) -> PyResult<()> { - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("log_cleared"); return Ok(()); }; let entity_path = parse_entity_path(entity_path)?; - let timepoint = time(false); + let timepoint = time(false, data_stream); - rec_stream.record_path_op(timepoint, PathOp::clear(recursive, entity_path)); + data_stream.record_path_op(timepoint, PathOp::clear(recursive, entity_path)); Ok(()) } @@ -1104,22 +1067,30 @@ fn log_cleared(entity_path: &str, recursive: bool) -> PyResult<()> { #[pyfunction] fn log_arrow_msg(entity_path: &str, components: &PyDict, timeless: bool) -> PyResult<()> { let entity_path = parse_entity_path(entity_path)?; + let timepoint = { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { + no_active_recording("log_arrow_msg"); + return Ok(()); + }; + time(timeless, data_stream) + }; // It's important that we don't hold the session lock while building our arrow component. // the API we call to back through pyarrow temporarily releases the GIL, which can cause // cause a deadlock. let data_table = - crate::arrow::build_data_table_from_components(&entity_path, components, &time(timeless))?; + crate::arrow::build_data_table_from_components(&entity_path, components, &timepoint)?; - let rec_stream = global_data_stream(); - let Some(rec_stream) = rec_stream.as_ref() else { + let data_stream = global_data_stream(); + let Some(data_stream) = data_stream.as_ref() else { no_active_recording("log_arrow_msg"); return Ok(()); }; // TODO(cmc): batch all the way to here at some point, maybe. for row in data_table.to_rows() { - record_row(rec_stream, row); + record_row(data_stream, row); } Ok(()) @@ -1131,7 +1102,7 @@ fn log_arrow_msg(entity_path: &str, components: &PyDict, timeless: bool) -> PyRe fn get_recording_id() -> PyResult { let recording_id = global_data_stream() .as_ref() - .and_then(|rec_stream| rec_stream.recording_info().map(|info| info.recording_id)); + .and_then(|data_stream| data_stream.recording_info().map(|info| info.recording_id)); match recording_id { Some(id) => Ok(id.to_string()), @@ -1184,8 +1155,8 @@ fn slice_from_np_array<'a, T: numpy::Element, D: numpy::ndarray::Dimension>( Cow::Owned(array.iter().cloned().collect()) } -fn record_row(rec_stream: &RecordingStream, row: DataRow) { - rec_stream.record_row(row); +fn record_row(data_stream: &RecordingStream, row: DataRow) { + data_stream.record_row(row); } fn parse_entity_path(entity_path: &str) -> PyResult { @@ -1199,11 +1170,3 @@ fn parse_entity_path(entity_path: &str) -> PyResult { Ok(EntityPath::from(components)) } } - -fn time(timeless: bool) -> TimePoint { - if timeless { - TimePoint::timeless() - } else { - ThreadInfo::thread_now() - } -} From aac28aa4a025a6cdaba8f69a6e5eaab82e5e2490 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 4 May 2023 20:53:35 +0200 Subject: [PATCH 28/31] just build rows directly, thereby _not_ prevent size computation --- rerun_py/src/arrow.rs | 10 ++++------ rerun_py/src/python_bridge.rs | 10 ++-------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/rerun_py/src/arrow.rs b/rerun_py/src/arrow.rs index 0a0d7e5663cd..c07a5bb08269 100644 --- a/rerun_py/src/arrow.rs +++ b/rerun_py/src/arrow.rs @@ -80,12 +80,12 @@ pub fn get_registered_component_names(py: pyo3::Python<'_>) -> PyResult<&PyDict> Ok(fields.into_py_dict(py)) } -/// Build a [`DataTable`] given a '**kwargs'-style dictionary of component arrays. -pub fn build_data_table_from_components( +/// Build a [`DataRow`] given a '**kwargs'-style dictionary of component arrays. +pub fn build_data_row_from_components( entity_path: &EntityPath, components: &PyDict, time_point: &TimePoint, -) -> PyResult { +) -> PyResult { let (arrays, fields): (Vec>, Vec) = itertools::process_results( components.iter().map(|(name, array)| { let name = name.downcast::()?.to_str()?; @@ -109,7 +109,5 @@ pub fn build_data_table_from_components( cells, ); - let data_table = row.into_table(); - - Ok(data_table) + Ok(row) } diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index 4e23046e5024..eb004fecc122 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -39,8 +39,6 @@ use crate::arrow::get_registered_component_names; // --- FFI --- -// we don't even need mutexes... - /// The global [`RecordingStream`] object used by the Python API for data. fn global_data_stream() -> parking_lot::MutexGuard<'static, Option> { use once_cell::sync::OnceCell; @@ -1079,8 +1077,7 @@ fn log_arrow_msg(entity_path: &str, components: &PyDict, timeless: bool) -> PyRe // It's important that we don't hold the session lock while building our arrow component. // the API we call to back through pyarrow temporarily releases the GIL, which can cause // cause a deadlock. - let data_table = - crate::arrow::build_data_table_from_components(&entity_path, components, &timepoint)?; + let row = crate::arrow::build_data_row_from_components(&entity_path, components, &timepoint)?; let data_stream = global_data_stream(); let Some(data_stream) = data_stream.as_ref() else { @@ -1088,10 +1085,7 @@ fn log_arrow_msg(entity_path: &str, components: &PyDict, timeless: bool) -> PyRe return Ok(()); }; - // TODO(cmc): batch all the way to here at some point, maybe. - for row in data_table.to_rows() { - record_row(data_stream, row); - } + record_row(data_stream, row); Ok(()) } From 7c1e97af96d67f0dc602a5e48c73e0ad5b1b283f Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 4 May 2023 21:20:13 +0200 Subject: [PATCH 29/31] get_recording_id might return nothing now --- rerun_py/rerun_sdk/rerun/__init__.py | 14 +++++++++++--- rerun_py/src/python_bridge.rs | 15 ++++++--------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/rerun_py/rerun_sdk/rerun/__init__.py b/rerun_py/rerun_sdk/rerun/__init__.py index b17b919217e4..e000f3cd99db 100644 --- a/rerun_py/rerun_sdk/rerun/__init__.py +++ b/rerun_py/rerun_sdk/rerun/__init__.py @@ -84,9 +84,11 @@ def unregister_shutdown() -> None: # ----------------------------------------------------------------------------- -def get_recording_id() -> str: +def get_recording_id() -> Optional[str]: """ - Get the recording ID that this process is logging to, as a UUIDv4. + Get the recording ID that this process is logging to, as a UUIDv4, if any. + + You must have called [`rr.init`][] first in order to have an active recording. The default recording_id is based on `multiprocessing.current_process().authkey` which means that all processes spawned with `multiprocessing` @@ -103,7 +105,10 @@ def get_recording_id() -> str: The recording ID that this process is logging to. """ - return str(bindings.get_recording_id()) + rid = bindings.get_recording_id() + if rid: + return str(rid) + return None def init( @@ -116,6 +121,9 @@ def init( """ Initialize the Rerun SDK with a user-chosen application id (name). + You must call this function first in order to initialize a global recording. + Without an active recording, all methods of the SDK will turn into no-ops. + Parameters ---------- application_id : str diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index eb004fecc122..1d56f8f596f3 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -1093,15 +1093,12 @@ fn log_arrow_msg(entity_path: &str, components: &PyDict, timeless: bool) -> PyRe // --- Misc --- #[pyfunction] -fn get_recording_id() -> PyResult { - let recording_id = global_data_stream() - .as_ref() - .and_then(|data_stream| data_stream.recording_info().map(|info| info.recording_id)); - - match recording_id { - Some(id) => Ok(id.to_string()), - None => Err(PyTypeError::new_err("module has not been initialized")), - } +fn get_recording_id() -> Option { + global_data_stream().as_ref().and_then(|data_stream| { + data_stream + .recording_info() + .map(|info| info.recording_id.to_string()) + }) } #[pyfunction] From 57e05944f78e36e2467382498927cfc1bbfa7522 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 4 May 2023 22:22:52 +0200 Subject: [PATCH 30/31] make a lack of active recording a warn_once situation across both languages and all weird situations --- crates/re_sdk/src/recording_stream.rs | 22 +++++++++---------- rerun_py/rerun_sdk/rerun/__init__.py | 11 +++++----- rerun_py/rerun_sdk/rerun/log/log_decorator.py | 3 +++ rerun_py/src/python_bridge.rs | 4 +++- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/crates/re_sdk/src/recording_stream.rs b/crates/re_sdk/src/recording_stream.rs index 9229b96132a7..d954b03b1cb5 100644 --- a/crates/re_sdk/src/recording_stream.rs +++ b/crates/re_sdk/src/recording_stream.rs @@ -572,7 +572,7 @@ impl RecordingStream { #[inline] pub fn record_msg(&self, msg: LogMsg) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to record_msg() ignored"); + re_log::warn_once!("Recording disabled - call to record_msg() ignored"); return; }; @@ -592,7 +592,7 @@ impl RecordingStream { path_op: re_log_types::PathOp, ) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to record_path_op() ignored"); + re_log::warn_once!("Recording disabled - call to record_path_op() ignored"); return; }; @@ -613,7 +613,7 @@ impl RecordingStream { #[inline] pub fn record_row(&self, row: DataRow) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to record_row() ignored"); + re_log::warn_once!("Recording disabled - call to record_row() ignored"); return; }; @@ -636,7 +636,7 @@ impl RecordingStream { /// cannot be repaired), all pending data in its buffers will be dropped. pub fn set_sink(&self, sink: Box) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to set_sink() ignored"); + re_log::warn_once!("Recording disabled - call to set_sink() ignored"); return; }; @@ -665,7 +665,7 @@ impl RecordingStream { /// See [`RecordingStream`] docs for ordering semantics and multithreading guarantees. pub fn flush_async(&self) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to flush_async() ignored"); + re_log::warn_once!("Recording disabled - call to flush_async() ignored"); return; }; @@ -692,7 +692,7 @@ impl RecordingStream { /// See [`RecordingStream`] docs for ordering semantics and multithreading guarantees. pub fn flush_blocking(&self) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to flush_blocking() ignored"); + re_log::warn_once!("Recording disabled - call to flush_blocking() ignored"); return; }; @@ -824,7 +824,7 @@ impl RecordingStream { /// Returns the current time of the recording on the current thread. pub fn now(&self) -> TimePoint { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to now() ignored"); + re_log::warn_once!("Recording disabled - call to now() ignored"); return TimePoint::default(); }; @@ -840,7 +840,7 @@ impl RecordingStream { /// You can remove a timeline again using `set_time_sequence("frame_nr", None)`. pub fn set_time_sequence(&self, timeline: impl Into, sequence: Option) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to set_time_sequence() ignored"); + re_log::warn_once!("Recording disabled - call to set_time_sequence() ignored"); return; }; @@ -860,7 +860,7 @@ impl RecordingStream { /// You can remove a timeline again using `rec.set_time_seconds("sim_time", None)`. pub fn set_time_seconds(&self, timeline: &str, seconds: Option) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to set_time_seconds() ignored"); + re_log::warn_once!("Recording disabled - call to set_time_seconds() ignored"); return; }; @@ -880,7 +880,7 @@ impl RecordingStream { /// You can remove a timeline again using `rec.set_time_seconds("sim_time", None)`. pub fn set_time_nanos(&self, timeline: &str, ns: Option) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to set_time_nanos() ignored"); + re_log::warn_once!("Recording disabled - call to set_time_nanos() ignored"); return; }; @@ -898,7 +898,7 @@ impl RecordingStream { /// For example: `rec.reset_time()`. pub fn reset_time(&self) { let Some(this) = &*self.inner else { - re_log::debug!("Recording disabled - call to reset_time() ignored"); + re_log::warn_once!("Recording disabled - call to reset_time() ignored"); return; }; diff --git a/rerun_py/rerun_sdk/rerun/__init__.py b/rerun_py/rerun_sdk/rerun/__init__.py index e000f3cd99db..16a26610bcd1 100644 --- a/rerun_py/rerun_sdk/rerun/__init__.py +++ b/rerun_py/rerun_sdk/rerun/__init__.py @@ -1,6 +1,7 @@ """The Rerun Python SDK, which is a wrapper around the re_sdk crate.""" import atexit +import logging from typing import Optional import rerun_bindings as bindings # type: ignore[attr-defined] @@ -279,7 +280,7 @@ def connect(addr: Optional[str] = None) -> None: """ if not bindings.is_enabled(): - print("Rerun is disabled - connect() call ignored") + logging.warning("Rerun is disabled - connect() call ignored") return bindings.connect(addr) @@ -307,7 +308,7 @@ def spawn(port: int = 9876, connect: bool = True) -> None: """ if not bindings.is_enabled(): - print("Rerun is disabled - spawn() call ignored") + logging.warning("Rerun is disabled - spawn() call ignored") return import os @@ -360,7 +361,7 @@ def serve(open_browser: bool = True, web_port: Optional[int] = None, ws_port: Op """ if not bindings.is_enabled(): - print("Rerun is disabled - serve() call ignored") + logging.warning("Rerun is disabled - serve() call ignored") return bindings.serve(open_browser, web_port, ws_port) @@ -385,7 +386,7 @@ def start_web_viewer_server(port: int = 0) -> None: """ if not bindings.is_enabled(): - print("Rerun is disabled - self_host_assets() call ignored") + logging.warning("Rerun is disabled - self_host_assets() call ignored") return bindings.start_web_viewer_server(port) @@ -414,7 +415,7 @@ def save(path: str) -> None: """ if not bindings.is_enabled(): - print("Rerun is disabled - save() call ignored") + logging.warning("Rerun is disabled - save() call ignored") return bindings.save(path) diff --git a/rerun_py/rerun_sdk/rerun/log/log_decorator.py b/rerun_py/rerun_sdk/rerun/log/log_decorator.py index d7be628412cc..4fbb0b386eac 100644 --- a/rerun_py/rerun_sdk/rerun/log/log_decorator.py +++ b/rerun_py/rerun_sdk/rerun/log/log_decorator.py @@ -1,6 +1,7 @@ import functools import logging import traceback +import warnings from typing import Any, Callable, TypeVar, cast import rerun @@ -25,6 +26,8 @@ def log_decorator(func: _TFunc) -> _TFunc: @functools.wraps(func) def wrapper(*args: Any, **kwargs: Any) -> Any: if not bindings.is_enabled(): + # NOTE: use `warnings` which handles runtime deduplication. + warnings.warn(f"Rerun is disabled - {func.__name__}() call ignored") return if rerun.strict_mode(): diff --git a/rerun_py/src/python_bridge.rs b/rerun_py/src/python_bridge.rs index 1d56f8f596f3..a6af1513cf84 100644 --- a/rerun_py/src/python_bridge.rs +++ b/rerun_py/src/python_bridge.rs @@ -150,7 +150,9 @@ fn rerun_bindings(_py: Python<'_>, m: &PyModule) -> PyResult<()> { } fn no_active_recording(origin: &str) { - re_log::debug!("No active recording - call to {origin}() ignored (have you called rr.init()?)",); + re_log::warn_once!( + "No active recording - call to {origin}() ignored (have you called rr.init()?)", + ); } // --- Init --- From b3f6a804104b0ee37ee0c19fbc8b332e72be3d38 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Thu, 4 May 2023 23:04:20 +0200 Subject: [PATCH 31/31] not an issue anymore --- crates/re_sdk_comms/src/buffered_client.rs | 4 ---- rerun_py/src/arrow.rs | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/re_sdk_comms/src/buffered_client.rs b/crates/re_sdk_comms/src/buffered_client.rs index a6063e884ddf..797a4a23b34e 100644 --- a/crates/re_sdk_comms/src/buffered_client.rs +++ b/crates/re_sdk_comms/src/buffered_client.rs @@ -202,10 +202,6 @@ fn msg_encode( re_log::error!("Failed to send message to tcp_sender thread. Likely a shutdown race-condition."); return; } - // TODO: this is incorrect and dangerous: flush() can return before this - // thread is done with its workload, which means the python process might be - // dead before this thread is dead, which means we call a C callback that has - // been dunload(). if msg_drop_tx.send(msg_msg).is_err() { re_log::error!("Failed to send message to msg_drop thread. Likely a shutdown race-condition"); return; diff --git a/rerun_py/src/arrow.rs b/rerun_py/src/arrow.rs index c07a5bb08269..def3f5558683 100644 --- a/rerun_py/src/arrow.rs +++ b/rerun_py/src/arrow.rs @@ -9,7 +9,7 @@ use pyo3::{ types::{IntoPyDict, PyString}, PyAny, PyResult, }; -use re_log_types::{component_types, DataCell, DataRow, DataTable, EntityPath, RowId, TimePoint}; +use re_log_types::{component_types, DataCell, DataRow, EntityPath, RowId, TimePoint}; /// Perform conversion between a pyarrow array to arrow2 types. ///