Skip to content

Commit

Permalink
feat(cli): add ability to sample comments
Browse files Browse the repository at this point in the history
  • Loading branch information
joe-prosser committed Aug 5, 2024
1 parent ebf1482 commit d44a9f3
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- Retry when putting comments
- Add ability to get email by id
- Add ability to upload attachment content for comments
- Add ability to randomly sample with `get comments`

# v0.29.0
- Add `config parse-from-url` command for parsing configuration from a URL
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions api/src/resources/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ pub struct StatisticsRequestParams {
pub enum OrderEnum {
ByLabel { label: String },
Recent,
Sample { seed: usize },
}

#[derive(Debug, Clone, Serialize)]
Expand Down
1 change: 1 addition & 0 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ encoding_rs = "0.8.33"
ordered-float = { version = "3.9.1", features = ["serde"] }
mailparse = "0.14.0"
diff = "0.1.13"
rand = "0.8.5"

[dev-dependencies]
pretty_assertions = "1.3.0"
Expand Down
20 changes: 19 additions & 1 deletion cli/src/commands/get/comments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use chrono::{DateTime, Utc};
use colored::Colorize;
use dialoguer::{Confirm, Input, Select};
use log::info;
use rand::Rng;
use regex::Regex;
use reinfer_client::{
resources::{
Expand Down Expand Up @@ -126,6 +127,10 @@ pub struct GetManyCommentsArgs {
#[structopt(long = "--only-with-attachments")]
/// Whether to only return comments with attachment metadata
only_with_attachments: Option<bool>,

#[structopt(long = "--shuffle")]
/// Whether to return comments in a random order
shuffle: Option<bool>,
}

#[derive(Debug, Deserialize)]
Expand Down Expand Up @@ -309,6 +314,7 @@ pub fn get_many(client: &Client, args: &GetManyCommentsArgs) -> Result<()> {
senders,
include_attachment_content,
only_with_attachments,
shuffle,
} = args;

let by_timerange = from_timestamp.is_some() || to_timestamp.is_some();
Expand Down Expand Up @@ -368,6 +374,10 @@ pub fn get_many(client: &Client, args: &GetManyCommentsArgs) -> Result<()> {
bail!("Cannot include attachment content when no file is provided")
}

if shuffle.is_some() && dataset.is_none() {
bail!("Cannot shuffle data when dataset is not provided")
}

let OutputLocations {
jsonl_file,
attachments_dir,
Expand Down Expand Up @@ -454,6 +464,7 @@ pub fn get_many(client: &Client, args: &GetManyCommentsArgs) -> Result<()> {
messages_filter: Some(messages_filter),
attachments_dir,
only_with_attachments_filter,
shuffle: shuffle.unwrap_or(false),
};

if let Some(file) = jsonl_file {
Expand Down Expand Up @@ -509,6 +520,7 @@ struct CommentDownloadOptions {
messages_filter: Option<MessagesFilter>,
attachments_dir: Option<PathBuf>,
only_with_attachments_filter: Option<AttributeFilter>,
shuffle: bool,
}

impl CommentDownloadOptions {
Expand Down Expand Up @@ -685,7 +697,13 @@ fn get_comments_from_uids(
messages: options.messages_filter.clone(),
},
limit: DEFAULT_QUERY_PAGE_SIZE,
order: OrderEnum::Recent,
order: if options.shuffle {
OrderEnum::Sample {
seed: rand::thread_rng().gen_range(0..2_i64.pow(31) - 1) as usize,
}
} else {
OrderEnum::Recent
},
};

client
Expand Down

0 comments on commit d44a9f3

Please sign in to comment.