From 62ab5c19e21a6905f4532737c5e1a3b743a7633e Mon Sep 17 00:00:00 2001 From: Joe Prosser Date: Wed, 24 Jul 2024 14:07:19 +0100 Subject: [PATCH 1/6] feat(commands): add lossy flag when creating annotations --- cli/src/commands/create/annotations.rs | 23 +++++++++++++++++++++-- cli/src/commands/create/comments.rs | 9 +++++++++ cli/src/main.rs | 19 +++++++++++++++---- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/cli/src/commands/create/annotations.rs b/cli/src/commands/create/annotations.rs index 76722307..441761e4 100644 --- a/cli/src/commands/create/annotations.rs +++ b/cli/src/commands/create/annotations.rs @@ -1,4 +1,7 @@ -use crate::progress::{Options as ProgressOptions, Progress}; +use crate::{ + print_error_as_warning, + progress::{Options as ProgressOptions, Progress}, +}; use anyhow::{Context, Result}; use colored::Colorize; use log::info; @@ -47,6 +50,10 @@ pub struct CreateAnnotationsArgs { #[structopt(long = "batch-size", default_value = "128")] /// Number of comments to batch in a single request. batch_size: usize, + + #[structopt(long)] + /// Whether to attempt to resume processing on error + lossy: bool, } pub fn create(client: &Client, args: &CreateAnnotationsArgs, pool: &mut Pool) -> Result<()> { @@ -95,6 +102,7 @@ pub fn create(client: &Client, args: &CreateAnnotationsArgs, pool: &mut Pool) -> args.use_moon_forms, args.batch_size, pool, + args.lossy, )?; if let Some(mut progress) = progress { progress.done(); @@ -117,6 +125,7 @@ pub fn create(client: &Client, args: &CreateAnnotationsArgs, pool: &mut Pool) -> args.use_moon_forms, args.batch_size, pool, + args.lossy, )?; statistics } @@ -133,6 +142,7 @@ pub trait AnnotationStatistic { fn add_annotation(&self); } +#[allow(clippy::too_many_arguments)] pub fn upload_batch_of_annotations( annotations_to_upload: &mut Vec, client: &Client, @@ -141,6 +151,7 @@ pub fn upload_batch_of_annotations( dataset_name: &DatasetFullName, use_moon_forms: bool, pool: &mut Pool, + lossy: bool, ) -> Result<()> { let (error_sender, error_receiver) = channel(); @@ -190,7 +201,12 @@ pub fn upload_batch_of_annotations( }); if let Ok(error) = error_receiver.try_recv() { - Err(error) + if lossy { + print_error_as_warning(&error); + Ok(()) + } else { + Err(error) + } } else { annotations_to_upload.clear(); Ok(()) @@ -207,6 +223,7 @@ fn upload_annotations_from_reader( use_moon_forms: bool, batch_size: usize, pool: &mut Pool, + lossy: bool, ) -> Result<()> { let mut annotations_to_upload = Vec::new(); @@ -224,6 +241,7 @@ fn upload_annotations_from_reader( dataset_name, use_moon_forms, pool, + lossy, )?; } } @@ -238,6 +256,7 @@ fn upload_annotations_from_reader( dataset_name, use_moon_forms, pool, + lossy, )?; } diff --git a/cli/src/commands/create/comments.rs b/cli/src/commands/create/comments.rs index e28624f1..958237ab 100644 --- a/cli/src/commands/create/comments.rs +++ b/cli/src/commands/create/comments.rs @@ -74,6 +74,10 @@ pub struct CreateCommentsArgs { #[structopt(short = "y", long = "yes")] /// Consent to ai unit charge. Suppresses confirmation prompt. yes: bool, + + #[structopt(long)] + /// Whether to attempt to resume processing on error + lossy: bool, } pub fn create(client: &Client, args: &CreateCommentsArgs, pool: &mut Pool) -> Result<()> { @@ -152,6 +156,7 @@ pub fn create(client: &Client, args: &CreateCommentsArgs, pool: &mut Pool) -> Re args.use_moon_forms, args.no_charge, pool, + args.lossy, )?; if let Some(mut progress) = progress { progress.done(); @@ -180,6 +185,7 @@ pub fn create(client: &Client, args: &CreateCommentsArgs, pool: &mut Pool) -> Re args.use_moon_forms, args.no_charge, pool, + args.lossy, )?; statistics } @@ -330,6 +336,7 @@ fn upload_comments_from_reader( use_moon_forms: bool, no_charge: bool, pool: &mut Pool, + lossy: bool, ) -> Result<()> { assert!(batch_size > 0); @@ -415,6 +422,7 @@ fn upload_comments_from_reader( dataset_name, use_moon_forms, pool, + lossy, )?; } } @@ -442,6 +450,7 @@ fn upload_comments_from_reader( dataset_name, use_moon_forms, pool, + lossy, )?; } } diff --git a/cli/src/main.rs b/cli/src/main.rs index 3d4e79f4..d504e5c2 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -203,15 +203,26 @@ fn find_configuration(args: &Args) -> Result { Ok(config_path) } +pub fn print_error_as_warning(error: &anyhow::Error) { + warn!("An error occurred. Resuming...:"); + for cause in error.chain() { + warn!(" |- {}", cause); + } +} + +pub fn print_error(error: &anyhow::Error) { + error!("An error occurred:"); + for cause in error.chain() { + error!(" |- {}", cause); + } +} + fn main() { let args = Args::from_args(); utils::init_env_logger(args.verbose); if let Err(error) = run(args) { - error!("An error occurred:"); - for cause in error.chain() { - error!(" |- {}", cause); - } + print_error(&error); #[cfg(feature = "backtrace")] { From 4345cef90cddab90a9fecad2c92cd4562ccb5ccd Mon Sep 17 00:00:00 2001 From: Joe Prosser Date: Wed, 24 Jul 2024 14:29:30 +0100 Subject: [PATCH 2/6] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21339d0a..dcab0f72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ - Add `config parse-from-url` command for parsing configuration from a URL - Add ability to download attachments for comments - Increase default http timeout to 120s +- Add `--lossy` flag when creating annotations # v0.28.0 - Add general fields to `create datasets` From 4198ccf90258b9e9e60c207ada7a2ec4dcd64bdd Mon Sep 17 00:00:00 2001 From: Joe Prosser Date: Wed, 24 Jul 2024 14:36:01 +0100 Subject: [PATCH 3/6] tweak resuming message --- cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index d504e5c2..be62b280 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -204,7 +204,7 @@ fn find_configuration(args: &Args) -> Result { } pub fn print_error_as_warning(error: &anyhow::Error) { - warn!("An error occurred. Resuming...:"); + warn!("An error occurred. Resuming..."); for cause in error.chain() { warn!(" |- {}", cause); } From 9c5e67ace0d62cb1ec46d9f2e4d42cae5848b073 Mon Sep 17 00:00:00 2001 From: Joe Prosser Date: Wed, 24 Jul 2024 14:54:09 +0100 Subject: [PATCH 4/6] add progress bar --- cli/src/commands/create/annotations.rs | 28 +++++++++++++++++++++++--- cli/src/commands/create/comments.rs | 26 ++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/cli/src/commands/create/annotations.rs b/cli/src/commands/create/annotations.rs index 441761e4..abbcab31 100644 --- a/cli/src/commands/create/annotations.rs +++ b/cli/src/commands/create/annotations.rs @@ -140,6 +140,7 @@ pub fn create(client: &Client, args: &CreateAnnotationsArgs, pool: &mut Pool) -> pub trait AnnotationStatistic { fn add_annotation(&self); + fn add_failed_annotation(&self); } #[allow(clippy::too_many_arguments)] @@ -193,9 +194,10 @@ pub fn upload_batch_of_annotations( if let Err(error) = result { error_sender.send(error).expect("Could not send error"); + statistics.add_failed_annotation(); + } else { + statistics.add_annotation(); } - - statistics.add_annotation(); }); }) }); @@ -326,12 +328,16 @@ fn read_annotations_iter<'a>( pub struct Statistics { bytes_read: AtomicUsize, annotations: AtomicUsize, + failed_annotations: AtomicUsize, } impl AnnotationStatistic for Statistics { fn add_annotation(&self) { self.annotations.fetch_add(1, Ordering::SeqCst); } + fn add_failed_annotation(&self) { + self.failed_annotations.fetch_add(1, Ordering::SeqCst); + } } impl Statistics { @@ -339,6 +345,7 @@ impl Statistics { Self { bytes_read: AtomicUsize::new(0), annotations: AtomicUsize::new(0), + failed_annotations: AtomicUsize::new(0), } } @@ -356,14 +363,29 @@ impl Statistics { pub fn num_annotations(&self) -> usize { self.annotations.load(Ordering::SeqCst) } + + #[inline] + pub fn num_failed_annotations(&self) -> usize { + self.failed_annotations.load(Ordering::SeqCst) + } } fn basic_statistics(statistics: &Statistics) -> (u64, String) { let bytes_read = statistics.bytes_read(); let num_annotations = statistics.num_annotations(); + let num_failed_annotations = statistics.num_failed_annotations(); ( bytes_read as u64, - format!("{} {}", num_annotations, "annotations".dimmed(),), + format!( + "{} {}{}", + num_annotations, + "annotations".dimmed(), + if num_failed_annotations > 0 { + format!(" {} {}", num_failed_annotations, "skipped".dimmed()) + } else { + "".to_string() + } + ), ) } diff --git a/cli/src/commands/create/comments.rs b/cli/src/commands/create/comments.rs index 958237ab..052b258c 100644 --- a/cli/src/commands/create/comments.rs +++ b/cli/src/commands/create/comments.rs @@ -473,12 +473,16 @@ pub struct Statistics { updated: AtomicUsize, unchanged: AtomicUsize, annotations: AtomicUsize, + failed_annotations: AtomicUsize, } impl AnnotationStatistic for Statistics { fn add_annotation(&self) { self.annotations.fetch_add(1, Ordering::SeqCst); } + fn add_failed_annotation(&self) { + self.failed_annotations.fetch_add(1, Ordering::SeqCst); + } } impl Statistics { @@ -490,6 +494,7 @@ impl Statistics { updated: AtomicUsize::new(0), unchanged: AtomicUsize::new(0), annotations: AtomicUsize::new(0), + failed_annotations: AtomicUsize::new(0), } } @@ -535,6 +540,11 @@ impl Statistics { fn num_annotations(&self) -> usize { self.annotations.load(Ordering::SeqCst) } + + #[inline] + fn num_failed_annotations(&self) -> usize { + self.failed_annotations.load(Ordering::SeqCst) + } } /// Detailed statistics - only make sense if using --overwrite (i.e. exclusively sync endpoint) @@ -546,10 +556,11 @@ fn detailed_statistics(statistics: &Statistics) -> (u64, String) { let num_updated = statistics.num_updated(); let num_unchanged = statistics.num_unchanged(); let num_annotations = statistics.num_annotations(); + let num_failed_annotations = statistics.num_failed_annotations(); ( bytes_read as u64, format!( - "{} {}: {} {} {} {} {} {} [{} {}]", + "{} {}: {} {} {} {} {} {} [{} {}{}]", num_uploaded.to_string().bold(), "comments".dimmed(), num_new, @@ -560,6 +571,11 @@ fn detailed_statistics(statistics: &Statistics) -> (u64, String) { "nop".dimmed(), num_annotations, "annotations".dimmed(), + if num_failed_annotations > 0 { + format!(" {} {}", num_failed_annotations, "skipped".dimmed()) + } else { + "".to_string() + } ), ) } @@ -569,14 +585,20 @@ fn basic_statistics(statistics: &Statistics) -> (u64, String) { let bytes_read = statistics.bytes_read(); let num_uploaded = statistics.num_uploaded(); let num_annotations = statistics.num_annotations(); + let num_failed_annotations = statistics.num_failed_annotations(); ( bytes_read as u64, format!( - "{} {} [{} {}]", + "{} {} [{} {}{}]", num_uploaded.to_string().bold(), "comments".dimmed(), num_annotations, "annotations".dimmed(), + if num_failed_annotations > 0 { + format!(" {} {}", num_failed_annotations, "skipped".dimmed()) + } else { + "".to_string() + } ), ) } From 9bb5df2968f00d5dd14801ce439a8a6f8fb9f969 Mon Sep 17 00:00:00 2001 From: Joe Prosser Date: Wed, 24 Jul 2024 15:29:31 +0100 Subject: [PATCH 5/6] post review fix up --- CHANGELOG.md | 2 +- cli/src/commands/create/annotations.rs | 37 ++++++++++++------------- cli/src/commands/create/comments.rs | 38 ++++++++++++++------------ cli/src/main.rs | 19 +++---------- 4 files changed, 44 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dcab0f72..1e0a0669 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ - Add `config parse-from-url` command for parsing configuration from a URL - Add ability to download attachments for comments - Increase default http timeout to 120s -- Add `--lossy` flag when creating annotations +- Add `--resume-on-error` flag when creating annotations # v0.28.0 - Add general fields to `create datasets` diff --git a/cli/src/commands/create/annotations.rs b/cli/src/commands/create/annotations.rs index abbcab31..57e4d406 100644 --- a/cli/src/commands/create/annotations.rs +++ b/cli/src/commands/create/annotations.rs @@ -1,7 +1,4 @@ -use crate::{ - print_error_as_warning, - progress::{Options as ProgressOptions, Progress}, -}; +use crate::progress::{Options as ProgressOptions, Progress}; use anyhow::{Context, Result}; use colored::Colorize; use log::info; @@ -51,9 +48,9 @@ pub struct CreateAnnotationsArgs { /// Number of comments to batch in a single request. batch_size: usize, - #[structopt(long)] + #[structopt(long = "resume-on-error")] /// Whether to attempt to resume processing on error - lossy: bool, + resume_on_error: bool, } pub fn create(client: &Client, args: &CreateAnnotationsArgs, pool: &mut Pool) -> Result<()> { @@ -102,7 +99,7 @@ pub fn create(client: &Client, args: &CreateAnnotationsArgs, pool: &mut Pool) -> args.use_moon_forms, args.batch_size, pool, - args.lossy, + args.resume_on_error, )?; if let Some(mut progress) = progress { progress.done(); @@ -125,7 +122,7 @@ pub fn create(client: &Client, args: &CreateAnnotationsArgs, pool: &mut Pool) -> args.use_moon_forms, args.batch_size, pool, - args.lossy, + args.resume_on_error, )?; statistics } @@ -152,7 +149,7 @@ pub fn upload_batch_of_annotations( dataset_name: &DatasetFullName, use_moon_forms: bool, pool: &mut Pool, - lossy: bool, + resume_on_error: bool, ) -> Result<()> { let (error_sender, error_receiver) = channel(); @@ -203,8 +200,7 @@ pub fn upload_batch_of_annotations( }); if let Ok(error) = error_receiver.try_recv() { - if lossy { - print_error_as_warning(&error); + if resume_on_error { Ok(()) } else { Err(error) @@ -225,7 +221,7 @@ fn upload_annotations_from_reader( use_moon_forms: bool, batch_size: usize, pool: &mut Pool, - lossy: bool, + resume_on_error: bool, ) -> Result<()> { let mut annotations_to_upload = Vec::new(); @@ -243,7 +239,7 @@ fn upload_annotations_from_reader( dataset_name, use_moon_forms, pool, - lossy, + resume_on_error, )?; } } @@ -258,7 +254,7 @@ fn upload_annotations_from_reader( dataset_name, use_moon_forms, pool, - lossy, + resume_on_error, )?; } @@ -374,17 +370,20 @@ fn basic_statistics(statistics: &Statistics) -> (u64, String) { let bytes_read = statistics.bytes_read(); let num_annotations = statistics.num_annotations(); let num_failed_annotations = statistics.num_failed_annotations(); + + let failed_annotations_string = if num_failed_annotations > 0 { + format!(" {num_failed_annotations} {}", "skipped".dimmed()) + } else { + String::new() + }; + ( bytes_read as u64, format!( "{} {}{}", num_annotations, "annotations".dimmed(), - if num_failed_annotations > 0 { - format!(" {} {}", num_failed_annotations, "skipped".dimmed()) - } else { - "".to_string() - } + failed_annotations_string ), ) } diff --git a/cli/src/commands/create/comments.rs b/cli/src/commands/create/comments.rs index 052b258c..e18a7c38 100644 --- a/cli/src/commands/create/comments.rs +++ b/cli/src/commands/create/comments.rs @@ -75,9 +75,9 @@ pub struct CreateCommentsArgs { /// Consent to ai unit charge. Suppresses confirmation prompt. yes: bool, - #[structopt(long)] + #[structopt(long = "resume-on-error")] /// Whether to attempt to resume processing on error - lossy: bool, + resume_on_error: bool, } pub fn create(client: &Client, args: &CreateCommentsArgs, pool: &mut Pool) -> Result<()> { @@ -156,7 +156,7 @@ pub fn create(client: &Client, args: &CreateCommentsArgs, pool: &mut Pool) -> Re args.use_moon_forms, args.no_charge, pool, - args.lossy, + args.resume_on_error, )?; if let Some(mut progress) = progress { progress.done(); @@ -185,7 +185,7 @@ pub fn create(client: &Client, args: &CreateCommentsArgs, pool: &mut Pool) -> Re args.use_moon_forms, args.no_charge, pool, - args.lossy, + args.resume_on_error, )?; statistics } @@ -336,7 +336,7 @@ fn upload_comments_from_reader( use_moon_forms: bool, no_charge: bool, pool: &mut Pool, - lossy: bool, + resume_on_error: bool, ) -> Result<()> { assert!(batch_size > 0); @@ -422,7 +422,7 @@ fn upload_comments_from_reader( dataset_name, use_moon_forms, pool, - lossy, + resume_on_error, )?; } } @@ -450,7 +450,7 @@ fn upload_comments_from_reader( dataset_name, use_moon_forms, pool, - lossy, + resume_on_error, )?; } } @@ -557,6 +557,12 @@ fn detailed_statistics(statistics: &Statistics) -> (u64, String) { let num_unchanged = statistics.num_unchanged(); let num_annotations = statistics.num_annotations(); let num_failed_annotations = statistics.num_failed_annotations(); + let failed_annotations_string = if num_failed_annotations > 0 { + format!(" {num_failed_annotations} {}", "skipped".dimmed()) + } else { + String::new() + }; + ( bytes_read as u64, format!( @@ -571,11 +577,7 @@ fn detailed_statistics(statistics: &Statistics) -> (u64, String) { "nop".dimmed(), num_annotations, "annotations".dimmed(), - if num_failed_annotations > 0 { - format!(" {} {}", num_failed_annotations, "skipped".dimmed()) - } else { - "".to_string() - } + failed_annotations_string ), ) } @@ -586,6 +588,12 @@ fn basic_statistics(statistics: &Statistics) -> (u64, String) { let num_uploaded = statistics.num_uploaded(); let num_annotations = statistics.num_annotations(); let num_failed_annotations = statistics.num_failed_annotations(); + let failed_annotations_string = if num_failed_annotations > 0 { + format!(" {num_failed_annotations} {}", "skipped".dimmed()) + } else { + String::new() + }; + ( bytes_read as u64, format!( @@ -594,11 +602,7 @@ fn basic_statistics(statistics: &Statistics) -> (u64, String) { "comments".dimmed(), num_annotations, "annotations".dimmed(), - if num_failed_annotations > 0 { - format!(" {} {}", num_failed_annotations, "skipped".dimmed()) - } else { - "".to_string() - } + failed_annotations_string ), ) } diff --git a/cli/src/main.rs b/cli/src/main.rs index be62b280..3d4e79f4 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -203,26 +203,15 @@ fn find_configuration(args: &Args) -> Result { Ok(config_path) } -pub fn print_error_as_warning(error: &anyhow::Error) { - warn!("An error occurred. Resuming..."); - for cause in error.chain() { - warn!(" |- {}", cause); - } -} - -pub fn print_error(error: &anyhow::Error) { - error!("An error occurred:"); - for cause in error.chain() { - error!(" |- {}", cause); - } -} - fn main() { let args = Args::from_args(); utils::init_env_logger(args.verbose); if let Err(error) = run(args) { - print_error(&error); + error!("An error occurred:"); + for cause in error.chain() { + error!(" |- {}", cause); + } #[cfg(feature = "backtrace")] { From 8acf87980e4d7db4124f59ddb46176dba372b3eb Mon Sep 17 00:00:00 2001 From: Joe Prosser Date: Wed, 24 Jul 2024 15:36:34 +0100 Subject: [PATCH 6/6] clear annotations to upload when resuming --- cli/src/commands/create/annotations.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/cli/src/commands/create/annotations.rs b/cli/src/commands/create/annotations.rs index 57e4d406..285dbb27 100644 --- a/cli/src/commands/create/annotations.rs +++ b/cli/src/commands/create/annotations.rs @@ -201,6 +201,7 @@ pub fn upload_batch_of_annotations( if let Ok(error) = error_receiver.try_recv() { if resume_on_error { + annotations_to_upload.clear(); Ok(()) } else { Err(error)