diff --git a/CITATION.cff b/CITATION.cff index e7f1a91..59c1423 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -5,7 +5,7 @@ authors: given-names: "Vince" orcid: "https://orcid.org/0000-0003-4510-1609" title: "SciDataFlow: A Tool for Improving the Flow of Data through Science" -version: 0.8.11 +version: 0.8.12 doi: http://dx.doi.org/10.1093/bioinformatics/btad754 date-released: 2024-01-05 url: "https://github.com/vsbuffalo/scidataflow/" diff --git a/Cargo.toml b/Cargo.toml index 486dac7..4ed7ae1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scidataflow" -version = "0.8.11" +version = "0.8.12" edition = "2021" exclude = ["logo.png", "tests/test_data/**"] license = "MIT" diff --git a/src/lib/project.rs b/src/lib/project.rs index 3402f94..4f936ec 100644 --- a/src/lib/project.rs +++ b/src/lib/project.rs @@ -309,13 +309,26 @@ impl Project { self.save() } - pub async fn status(&mut self, include_remotes: bool, all: bool) -> Result<()> { + pub async fn status( + &mut self, + include_remotes: bool, + all: bool, + short: bool, + depth: Option, + ) -> Result<()> { // if include_remotes (e.g. --remotes) is set, we need to merge // in the remotes, so we authenticate first and then get them. let path_context = &canonicalize(self.path_context())?; let status_rows = self.data.status(path_context, include_remotes).await?; //let remotes: Option<_> = include_remotes.then(|| &self.data.remotes); - print_status(status_rows, Some(&self.data.remotes), all); + print_status( + status_rows, + Some(&self.data.remotes), + all, + short, + include_remotes, + depth, + ); Ok(()) } diff --git a/src/lib/utils.rs b/src/lib/utils.rs index ebf9384..51495dd 100644 --- a/src/lib/utils.rs +++ b/src/lib/utils.rs @@ -9,12 +9,16 @@ use std::collections::HashMap; use std::fs; use std::fs::File; use std::io::Read; +use std::ops::Add; use std::path::{Path, PathBuf}; use timeago::Formatter; use crate::lib::data::StatusEntry; use crate::lib::remote::Remote; +use super::data::LocalStatusCode; +use super::remote::RemoteStatusCode; + pub const ISSUE_URL: &str = "https://github.com/vsbuffalo/scidataflow/issues"; pub fn load_file(path: &PathBuf) -> String { @@ -76,73 +80,113 @@ pub async fn compute_md5(file_path: &Path) -> Result> { let result = md5.compute(); Ok(Some(format!("{:x}", result))) } -/* - pub fn print_fixed_width(rows: HashMap>, nspaces: Option, indent: Option, color: bool) { - let indent = indent.unwrap_or(0); - let nspaces = nspaces.unwrap_or(6); - - let max_cols = rows.values() - .flat_map(|v| v.iter()) - .filter_map(|entry| { - match &entry.cols { - None => None, - Some(cols) => Some(cols.len()) - } - }) - .max() - .unwrap_or(0); - - let mut max_lengths = vec![0; max_cols]; - -// compute max lengths across all rows -for entry in rows.values().flat_map(|v| v.iter()) { -if let Some(cols) = &entry.cols { -for (i, col) in cols.iter().enumerate() { -max_lengths[i] = max_lengths[i].max(col.width()); -} -} -} -// print status table -let mut keys: Vec<&String> = rows.keys().collect(); -keys.sort(); -for (key, value) in &rows { -let pretty_key = if color { key.bold().to_string() } else { key.clone() }; -println!("[{}]", pretty_key); - -// Print the rows with the correct widths -for row in value { -let mut fixed_row = Vec::new(); -let tracked = &row.tracked; -let local_status = &row.local_status; -let remote_status = &row.remote_status; -if let Some(cols) = &row.cols { -for (i, col) in cols.iter().enumerate() { -// push a fixed-width column to vector -let fixed_col = format!("{:width$}", col, width = max_lengths[i]); -fixed_row.push(fixed_col); -} -} -let spacer = " ".repeat(nspaces); -let status_line = fixed_row.join(&spacer); -println!("{}{}", " ".repeat(indent), status_line); -} -println!(); + +/// Get the directory at the specified depth from a path string +fn get_dir_at_depth(dir: &str, filename: &str, depth: usize) -> String { + // Combine directory and filename into a full path + let full_path = if dir.is_empty() { + Path::new(filename).to_path_buf() + } else { + Path::new(dir).join(filename).to_path_buf() + }; + + // Get the parent directory of the full path + let parent_path = full_path.parent().unwrap_or(Path::new(".")); + + // Split the parent path into components + let components: Vec<_> = parent_path.components().collect(); + + if depth == 0 || components.is_empty() { + return ".".to_string(); + } + + // Take components up to the specified depth + let depth_path: PathBuf = components + .iter() + .take(depth.min(components.len())) + .collect(); + + if depth_path.as_os_str().is_empty() { + ".".to_string() + } else { + depth_path.to_string_lossy().to_string() + } } + +pub fn print_fixed_width_status_short( + rows: BTreeMap>, + color: bool, + all: bool, + short: bool, + depth: Option, + has_remote_info: bool, +) { + // If depth is provided, reorganize the data based on the specified depth + let grouped_rows: BTreeMap> = if let Some(depth) = depth { + let mut depth_grouped: BTreeMap> = BTreeMap::new(); + for (dir_entry, entries) in rows { + for entry in entries { + let base_dir = get_dir_at_depth(&dir_entry.path, &entry.name, depth); + depth_grouped + .entry(DirectoryEntry { + path: base_dir, + remote_name: None, + }) + .or_insert_with(Vec::new) + .push(entry); + } + } + depth_grouped + } else { + rows + }; + // dbg!(&grouped_rows); + + // Print status table + let mut dir_keys: Vec<&DirectoryEntry> = grouped_rows.keys().collect(); + dir_keys.sort(); + + for key in dir_keys { + let mut statuses = grouped_rows[key] + .iter() + .filter(|status| !(status.local_status.is_none() && !all)) + .cloned() + .collect::>(); + + if statuses.is_empty() { + continue; + } + + // Sort the statuses by filename + statuses.sort_by(|a, b| a.name.cmp(&b.name)); + + let display_key = if key.path.is_empty() { + ".".to_string() + } else { + key.display().to_string() + }; + let prettier_key = if color { + display_key.bold().to_string() + } else { + display_key.to_string() + }; + println!("[{}]", prettier_key); + let file_counts = + get_counts(&statuses, has_remote_info).expect("Internal error: get_counts()."); + file_counts.pretty_print(short); + println!(); + } } -*/ -// More specialized version of print_fixed_width() for statuses. -// Handles coloring, manual annotation, etc + pub fn print_fixed_width_status( - rows: BTreeMap>, + rows: BTreeMap>, nspaces: Option, indent: Option, color: bool, all: bool, ) { - //debug!("rows: {:?}", rows); let indent = indent.unwrap_or(0); let nspaces = nspaces.unwrap_or(6); - let abbrev = Some(8); // get the max number of columns (in case ragged) @@ -159,20 +203,28 @@ pub fn print_fixed_width_status( for status in rows.values().flat_map(|v| v.iter()) { let cols = status.columns(abbrev); for (i, col) in cols.iter().enumerate() { - max_lengths[i] = max_lengths[i].max(col.len()); // Assuming col is a string + max_lengths[i] = max_lengths[i].max(col.len()); } } // print status table - let mut dir_keys: Vec<&String> = rows.keys().collect(); + let mut dir_keys: Vec<&DirectoryEntry> = rows.keys().collect(); dir_keys.sort(); + for key in dir_keys { - let statuses = &rows[key]; - let pretty_key = if key.is_empty() { "." } else { key }; + let mut statuses = rows[key].clone(); + // Sort by filename + statuses.sort_by(|a, b| a.name.cmp(&b.name)); + + let display_key = if key.path.is_empty() { + ".".to_string() + } else { + key.display().to_string() + }; let prettier_key = if color { - pretty_key.bold().to_string() + display_key.bold().to_string() } else { - pretty_key.to_string() + display_key.to_string() }; println!("[{}]", prettier_key); @@ -230,60 +282,145 @@ pub fn pluralize>(count: T, noun: &str) -> String { } } +#[derive(Debug, Default)] struct FileCounts { local: u64, + local_current: u64, + local_modified: u64, remote: u64, both: u64, total: u64, - #[allow(dead_code)] messy: u64, } -fn get_counts(rows: &BTreeMap>) -> Result { +impl FileCounts { + pub fn pretty_print(&self, short: bool) { + if short { + // Only show categories that have files + let mut parts = Vec::new(); + if self.local > 0 { + if self.local_modified > 0 { + parts.push(format!( + "{} local ({} modified)", + self.local.to_string().green(), + self.local_modified.to_string().red() + )); + } else { + parts.push(format!("{} local", self.local.to_string().green())); + } + } + if self.remote > 0 { + parts.push(format!("{} remote-only", self.remote.to_string().yellow())); + } + if self.both > 0 { + parts.push(format!("{} synced", self.both.to_string().cyan())); + } + if self.messy > 0 { + parts.push(format!("{} messy", self.messy.to_string().red())); + } + + if parts.is_empty() { + println!("no files"); + } else { + println!( + "{} ({})", + parts.join(", "), + format!("total: {}", self.total).bold() + ); + } + } else { + println!("{}", format!(" {} files total", self.total).bold()); + if self.both > 0 { + println!(" ✓ {} synced with remote", self.both.to_string().cyan()); + } + if self.local > 0 { + let status = if self.local_modified > 0 { + format!( + " ({} current, {} modified)", + self.local_current.to_string().green(), + self.local_modified.to_string().red() + ) + } else { + format!(" (all current)") + }; + println!( + " + {} local only{}", + self.local.to_string().green(), + status + ); + } + if self.remote > 0 { + println!(" - {} remote only", self.remote.to_string().yellow()); + } + if self.messy > 0 { + println!(" ! {} messy", self.messy.to_string().red()); + } + } + } +} + +fn get_counts(files: &Vec, has_remote_info: bool) -> Result { let mut local = 0; + let mut local_current = 0; + let mut local_modified = 0; let mut remote = 0; let mut both = 0; let mut total = 0; let mut messy = 0; - for files in rows.values() { - for file in files { - total += 1; - match (&file.local_status, &file.remote_status, &file.tracked) { - (None, None, _) => { - return Err(anyhow!("Internal Error: get_counts found a file with both local/remote set to None.")); - } - (None, Some(_), None) => { - remote += 1; - } - (Some(_), None, Some(false)) => { - local += 1; - } - (Some(_), None, None) => { - local += 1; - } - (None, Some(_), Some(true)) => { - remote += 1; - } - (None, Some(_), Some(false)) => { - local += 1; - } - (Some(_), Some(_), Some(true)) => { - both += 1; - } - (Some(_), Some(_), Some(false)) => { - messy += 1; - } - (Some(_), None, Some(true)) => { - remote += 1; + + for file in files { + total += 1; + + if !has_remote_info { + // When we don't have remote info, everything local is just local + if let Some(status) = &file.local_status { + local += 1; + match status { + LocalStatusCode::Current => local_current += 1, + LocalStatusCode::Modified => local_modified += 1, + _ => messy += 1, } - (Some(_), Some(_), None) => { - messy += 1; + } + continue; + } + + match (&file.local_status, &file.remote_status, &file.tracked) { + (None, None, _) => { + return Err(anyhow!( + "Internal Error: get_counts found a file with both local/remote set to None." + )); + } + // Local files (including those with NotExists remote status) + (Some(local_status), Some(RemoteStatusCode::NotExists), _) + | (Some(local_status), None, Some(false)) + | (Some(local_status), None, None) => { + local += 1; + match local_status { + LocalStatusCode::Current => local_current += 1, + LocalStatusCode::Modified => local_modified += 1, + _ => messy += 1, } } + // Files that exist both locally and remotely + (Some(_), Some(_), Some(true)) + if matches!(file.remote_status, Some(RemoteStatusCode::Current)) => + { + both += 1; + } + // Remote only files + (None, Some(_), _) => { + remote += 1; + } + // Everything else is messy + _ => { + messy += 1; + } } } Ok(FileCounts { local, + local_current, + local_modified, remote, both, total, @@ -291,41 +428,114 @@ fn get_counts(rows: &BTreeMap>) -> Result { }) } +impl Add for FileCounts { + type Output = FileCounts; + + fn add(self, other: FileCounts) -> FileCounts { + FileCounts { + local: self.local + other.local, + local_current: self.local_current + other.local_current, + local_modified: self.local_modified + other.local_modified, + remote: self.remote + other.remote, + both: self.both + other.both, + total: self.total + other.total, + messy: self.messy + other.messy, + } + } +} + +fn get_counts_tree( + rows: &BTreeMap>, + has_remote_info: bool, +) -> Result { + let mut counts = FileCounts::default(); + for files in rows.values() { + counts = counts + get_counts(files, has_remote_info)?; + } + Ok(counts) +} + +#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub struct DirectoryEntry { + path: String, + remote_name: Option, +} + +impl DirectoryEntry { + fn display(&self) -> String { + if let Some(remote) = &self.remote_name { + format!("{} > {}", self.path, remote) + } else { + self.path.clone() + } + } +} + pub fn print_status( rows: BTreeMap>, remote: Option<&HashMap>, all: bool, + short: bool, + has_remote_info: bool, + depth: Option, ) { println!("{}", "Project data status:".bold()); - let counts = get_counts(&rows).expect("Internal Error: get_counts() panicked."); - println!( - "{} local and tracked by a remote ({} only local, {} only remote), {} total.\n", - pluralize(counts.both, "file"), - pluralize(counts.local, "file"), - pluralize(counts.remote, "file"), - //pluralize(counts.messy as u64, "file"), - pluralize(counts.total, "file") - ); - - // this brings the remote name (if there is a corresponding remote) into - // the key, so the linked remote can be displayed in the status - let rows_by_dir: BTreeMap> = match remote { + + // Pass the remote info state to get_counts + let counts = + get_counts_tree(&rows, has_remote_info).expect("Internal Error: get_counts() panicked."); + + // Adjust the status message based on whether we have remote info + if has_remote_info { + println!( + "{} local and tracked by a remote ({} only local, {} only remote), {} total.\n", + pluralize(counts.both, "file"), + pluralize(counts.local, "file"), + pluralize(counts.remote, "file"), + pluralize(counts.total, "file") + ); + } else { + println!("{} local files total.\n", pluralize(counts.total, "file")); + } + + let rows_by_dir: BTreeMap> = match remote { Some(remote_map) => { let mut new_map = BTreeMap::new(); for (directory, statuses) in rows { - if let Some(remote) = remote_map.get(&directory) { - let new_key = format!("{} > {}", directory, remote.name()); - new_map.insert(new_key, statuses); + let entry = if let Some(remote) = remote_map.get(&directory) { + DirectoryEntry { + path: directory, + remote_name: Some(remote.name().to_string()), + } } else { - new_map.insert(directory, statuses); - } + DirectoryEntry { + path: directory, + remote_name: None, + } + }; + new_map.insert(entry, statuses); } new_map } - None => rows, + None => rows + .into_iter() + .map(|(dir, statuses)| { + ( + DirectoryEntry { + path: dir, + remote_name: None, + }, + statuses, + ) + }) + .collect(), }; - print_fixed_width_status(rows_by_dir, None, None, true, all); + if depth.is_some() { + print_fixed_width_status_short(rows_by_dir, true, all, short, depth, has_remote_info) + } else { + print_fixed_width_status(rows_by_dir, None, None, true, all); + } } pub fn format_bytes(size: u64) -> String { diff --git a/src/main.rs b/src/main.rs index 706ad93..22fefe4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -130,6 +130,14 @@ enum Commands { /// Show statuses of all files, including those on remote(s) but not in the manifest. #[arg(long)] all: bool, + + /// A more terse summary, with --depth 2. + #[arg(long)] + short: bool, + + /// Depth to summarize over. + #[arg(long)] + depth: Option, }, /// Show file size statistics. Stats {}, @@ -289,9 +297,15 @@ async fn run() -> Result<()> { proj.bulk(filename, *column, *header, *overwrite).await } Some(Commands::Init { name }) => Project::init(name.clone()), - Some(Commands::Status { remotes, all }) => { + Some(Commands::Status { + remotes, + all, + short, + depth, + }) => { let mut proj = Project::new()?; - proj.status(*remotes, *all).await + let depth = if *short { Some(2) } else { *depth }; + proj.status(*remotes, *all, *short, depth).await } Some(Commands::Stats {}) => { //let proj = Project::new()?;