diff --git a/CHANGELOG.md b/CHANGELOG.md index b1688053..af5ebb56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## Unreleased +- Add ability to get dataset stats - Show Global Permissions in `get users` - Upgrade `ordered-float` version, which is exposed in the public crate api. diff --git a/api/src/resources/dataset.rs b/api/src/resources/dataset.rs index b06c570b..a9ee4034 100644 --- a/api/src/resources/dataset.rs +++ b/api/src/resources/dataset.rs @@ -1,4 +1,5 @@ use chrono::{DateTime, Utc}; +use ordered_float::NotNan; use serde::{Deserialize, Serialize}; use crate::{ @@ -36,6 +37,18 @@ pub struct Dataset { pub label_groups: Vec, } +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct DatasetStats { + pub num_reviewed: NotNan, + pub total_verbatims: NotNan, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct DatasetAndStats { + pub dataset: Dataset, + pub stats: DatasetStats, +} + impl Dataset { pub fn full_name(&self) -> FullName { FullName(format!("{}/{}", self.owner.0, self.name.0)) diff --git a/cli/src/commands/get/datasets.rs b/cli/src/commands/get/datasets.rs index 13f5b0ef..b0109b45 100644 --- a/cli/src/commands/get/datasets.rs +++ b/cli/src/commands/get/datasets.rs @@ -1,5 +1,9 @@ use anyhow::{Context, Result}; -use reinfer_client::{Client, DatasetIdentifier}; +use log::info; +use reinfer_client::{ + resources::dataset::{DatasetAndStats, DatasetStats, StatisticsRequestParams}, + Client, CommentFilter, DatasetIdentifier, +}; use structopt::StructOpt; use crate::printer::Printer; @@ -9,10 +13,17 @@ pub struct GetDatasetsArgs { #[structopt(name = "dataset")] /// If specified, only list this dataset (name or id) dataset: Option, + + #[structopt(long = "stats")] + /// Whether to include dataset statistics in response + include_stats: bool, } pub fn get(client: &Client, args: &GetDatasetsArgs, printer: &Printer) -> Result<()> { - let GetDatasetsArgs { dataset } = args; + let GetDatasetsArgs { + dataset, + include_stats, + } = args; let datasets = if let Some(dataset) = dataset { vec![client .get_dataset(dataset.clone()) @@ -26,5 +37,45 @@ pub fn get(client: &Client, args: &GetDatasetsArgs, printer: &Printer) -> Result }); datasets }; - printer.print_resources(&datasets) + + let mut dataset_stats = Vec::new(); + if *include_stats { + datasets.iter().try_for_each(|dataset| -> Result<()> { + info!("Getting statistics for dataset {}", dataset.full_name().0); + let unfiltered_stats = client + .get_dataset_statistics( + &dataset.full_name(), + &StatisticsRequestParams { + ..Default::default() + }, + ) + .context("Could not get statistics for dataset")?; + + let reviewed_stats = client + .get_dataset_statistics( + &dataset.full_name(), + &StatisticsRequestParams { + comment_filter: CommentFilter { + reviewed:Some(reinfer_client::resources::comment::ReviewedFilterEnum::OnlyReviewed), + ..Default::default() + }, + ..Default::default() + }, + ) + .context("Could not get statistics for dataset")?; + + let dataset_and_stats = DatasetAndStats { + dataset: dataset.clone(), + stats: DatasetStats { + num_reviewed: reviewed_stats.num_comments, + total_verbatims: unfiltered_stats.num_comments + } + }; + dataset_stats.push(dataset_and_stats); + Ok(()) + })?; + printer.print_resources(&dataset_stats) + } else { + printer.print_resources(&datasets) + } } diff --git a/cli/src/printer.rs b/cli/src/printer.rs index 375f49e5..5a8270c9 100644 --- a/cli/src/printer.rs +++ b/cli/src/printer.rs @@ -2,7 +2,8 @@ use super::thousands::Thousands; use colored::Colorize; use prettytable::{format, row, Row, Table}; use reinfer_client::{ - resources::quota::Quota, Bucket, Dataset, Project, Source, Statistics, Stream, User, + resources::{dataset::DatasetAndStats, quota::Quota}, + Bucket, Dataset, Project, Source, Statistics, Stream, User, }; use serde::{Serialize, Serializer}; @@ -98,7 +99,30 @@ impl DisplayTable for Dataset { full_name, self.id.0, self.updated_at.format("%Y-%m-%d %H:%M:%S"), - self.title + self.title, + ] + } +} + +impl DisplayTable for DatasetAndStats { + fn to_table_headers() -> Row { + row![bFg => "Name", "ID", "Updated (UTC)", "Title","Total Verbatims", "Num Reviewed"] + } + + fn to_table_row(&self) -> Row { + let full_name = format!( + "{}{}{}", + self.dataset.owner.0.dimmed(), + "/".dimmed(), + self.dataset.name.0 + ); + row![ + full_name, + self.dataset.id.0, + self.dataset.updated_at.format("%Y-%m-%d %H:%M:%S"), + self.dataset.title, + self.stats.total_verbatims, + self.stats.num_reviewed ] } } diff --git a/cli/tests/common.rs b/cli/tests/common.rs index d495c560..68f3a4f2 100644 --- a/cli/tests/common.rs +++ b/cli/tests/common.rs @@ -1,3 +1,4 @@ +use anyhow::{anyhow, Result}; use once_cell::sync::Lazy; use reinfer_client::User; use std::{ @@ -76,6 +77,14 @@ impl TestCli { self.output_error(self.command().args(args)) } + #[track_caller] + pub fn run_and_result( + &self, + args: impl IntoIterator>, + ) -> Result { + self.output_result(self.command().args(args)) + } + #[track_caller] pub fn run_with_stdin( &self, @@ -117,6 +126,20 @@ impl TestCli { String::from_utf8(output.stdout).unwrap() } + #[track_caller] + pub fn output_result(&self, command: &mut Command) -> Result { + let output = command.output().unwrap(); + + if output.status.success() { + Ok(String::from_utf8(output.stdout)?) + } else { + Err(anyhow!( + "failed to run command:\n{}", + String::from_utf8_lossy(&output.stderr) + )) + } + } + #[track_caller] pub fn output_error(&self, command: &mut Command) -> String { let output = command.output().unwrap(); diff --git a/cli/tests/test_datasets.rs b/cli/tests/test_datasets.rs index 50283b65..54e9ceb4 100644 --- a/cli/tests/test_datasets.rs +++ b/cli/tests/test_datasets.rs @@ -1,3 +1,4 @@ +use backoff::{retry, ExponentialBackoff}; use pretty_assertions::assert_eq; use reinfer_client::{ Dataset, EntityDef, EntityName, LabelDef, LabelDefPretrained, LabelDefPretrainedId, LabelGroup, @@ -59,8 +60,13 @@ impl TestDataset { impl Drop for TestDataset { fn drop(&mut self) { - let output = TestCli::get().run(["delete", "dataset", self.identifier()]); - assert!(output.is_empty()); + let delete_dataset_command = || { + TestCli::get() + .run_and_result(["delete", "dataset", self.identifier()]) + .map_err(backoff::Error::transient) + }; + + retry(ExponentialBackoff::default(), delete_dataset_command).unwrap(); } }