Skip to content

Commit

Permalink
commands: add get dataset stats (#231)
Browse files Browse the repository at this point in the history
* commands: add get dataset stats
  • Loading branch information
joe-prosser authored Oct 14, 2023
1 parent a69fc08 commit 2f96ce7
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Unreleased

- Add ability to get dataset stats
- Show Global Permissions in `get users`
- Upgrade `ordered-float` version, which is exposed in the public crate api.

Expand Down
13 changes: 13 additions & 0 deletions api/src/resources/dataset.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use chrono::{DateTime, Utc};
use ordered_float::NotNan;
use serde::{Deserialize, Serialize};

use crate::{
Expand Down Expand Up @@ -36,6 +37,18 @@ pub struct Dataset {
pub label_groups: Vec<LabelGroup>,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
pub struct DatasetStats {
pub num_reviewed: NotNan<f64>,
pub total_verbatims: NotNan<f64>,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
pub struct DatasetAndStats {
pub dataset: Dataset,
pub stats: DatasetStats,
}

impl Dataset {
pub fn full_name(&self) -> FullName {
FullName(format!("{}/{}", self.owner.0, self.name.0))
Expand Down
57 changes: 54 additions & 3 deletions cli/src/commands/get/datasets.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
use anyhow::{Context, Result};
use reinfer_client::{Client, DatasetIdentifier};
use log::info;
use reinfer_client::{
resources::dataset::{DatasetAndStats, DatasetStats, StatisticsRequestParams},
Client, CommentFilter, DatasetIdentifier,
};
use structopt::StructOpt;

use crate::printer::Printer;
Expand All @@ -9,10 +13,17 @@ pub struct GetDatasetsArgs {
#[structopt(name = "dataset")]
/// If specified, only list this dataset (name or id)
dataset: Option<DatasetIdentifier>,

#[structopt(long = "stats")]
/// Whether to include dataset statistics in response
include_stats: bool,
}

pub fn get(client: &Client, args: &GetDatasetsArgs, printer: &Printer) -> Result<()> {
let GetDatasetsArgs { dataset } = args;
let GetDatasetsArgs {
dataset,
include_stats,
} = args;
let datasets = if let Some(dataset) = dataset {
vec![client
.get_dataset(dataset.clone())
Expand All @@ -26,5 +37,45 @@ pub fn get(client: &Client, args: &GetDatasetsArgs, printer: &Printer) -> Result
});
datasets
};
printer.print_resources(&datasets)

let mut dataset_stats = Vec::new();
if *include_stats {
datasets.iter().try_for_each(|dataset| -> Result<()> {
info!("Getting statistics for dataset {}", dataset.full_name().0);
let unfiltered_stats = client
.get_dataset_statistics(
&dataset.full_name(),
&StatisticsRequestParams {
..Default::default()
},
)
.context("Could not get statistics for dataset")?;

let reviewed_stats = client
.get_dataset_statistics(
&dataset.full_name(),
&StatisticsRequestParams {
comment_filter: CommentFilter {
reviewed:Some(reinfer_client::resources::comment::ReviewedFilterEnum::OnlyReviewed),
..Default::default()
},
..Default::default()
},
)
.context("Could not get statistics for dataset")?;

let dataset_and_stats = DatasetAndStats {
dataset: dataset.clone(),
stats: DatasetStats {
num_reviewed: reviewed_stats.num_comments,
total_verbatims: unfiltered_stats.num_comments
}
};
dataset_stats.push(dataset_and_stats);
Ok(())
})?;
printer.print_resources(&dataset_stats)
} else {
printer.print_resources(&datasets)
}
}
28 changes: 26 additions & 2 deletions cli/src/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use super::thousands::Thousands;
use colored::Colorize;
use prettytable::{format, row, Row, Table};
use reinfer_client::{
resources::quota::Quota, Bucket, Dataset, Project, Source, Statistics, Stream, User,
resources::{dataset::DatasetAndStats, quota::Quota},
Bucket, Dataset, Project, Source, Statistics, Stream, User,
};
use serde::{Serialize, Serializer};

Expand Down Expand Up @@ -98,7 +99,30 @@ impl DisplayTable for Dataset {
full_name,
self.id.0,
self.updated_at.format("%Y-%m-%d %H:%M:%S"),
self.title
self.title,
]
}
}

impl DisplayTable for DatasetAndStats {
fn to_table_headers() -> Row {
row![bFg => "Name", "ID", "Updated (UTC)", "Title","Total Verbatims", "Num Reviewed"]
}

fn to_table_row(&self) -> Row {
let full_name = format!(
"{}{}{}",
self.dataset.owner.0.dimmed(),
"/".dimmed(),
self.dataset.name.0
);
row![
full_name,
self.dataset.id.0,
self.dataset.updated_at.format("%Y-%m-%d %H:%M:%S"),
self.dataset.title,
self.stats.total_verbatims,
self.stats.num_reviewed
]
}
}
Expand Down
23 changes: 23 additions & 0 deletions cli/tests/common.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use anyhow::{anyhow, Result};
use once_cell::sync::Lazy;
use reinfer_client::User;
use std::{
Expand Down Expand Up @@ -76,6 +77,14 @@ impl TestCli {
self.output_error(self.command().args(args))
}

#[track_caller]
pub fn run_and_result(
&self,
args: impl IntoIterator<Item = impl AsRef<OsStr>>,
) -> Result<String> {
self.output_result(self.command().args(args))
}

#[track_caller]
pub fn run_with_stdin(
&self,
Expand Down Expand Up @@ -117,6 +126,20 @@ impl TestCli {
String::from_utf8(output.stdout).unwrap()
}

#[track_caller]
pub fn output_result(&self, command: &mut Command) -> Result<String> {
let output = command.output().unwrap();

if output.status.success() {
Ok(String::from_utf8(output.stdout)?)
} else {
Err(anyhow!(
"failed to run command:\n{}",
String::from_utf8_lossy(&output.stderr)
))
}
}

#[track_caller]
pub fn output_error(&self, command: &mut Command) -> String {
let output = command.output().unwrap();
Expand Down
10 changes: 8 additions & 2 deletions cli/tests/test_datasets.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use backoff::{retry, ExponentialBackoff};
use pretty_assertions::assert_eq;
use reinfer_client::{
Dataset, EntityDef, EntityName, LabelDef, LabelDefPretrained, LabelDefPretrainedId, LabelGroup,
Expand Down Expand Up @@ -59,8 +60,13 @@ impl TestDataset {

impl Drop for TestDataset {
fn drop(&mut self) {
let output = TestCli::get().run(["delete", "dataset", self.identifier()]);
assert!(output.is_empty());
let delete_dataset_command = || {
TestCli::get()
.run_and_result(["delete", "dataset", self.identifier()])
.map_err(backoff::Error::transient)
};

retry(ExponentialBackoff::default(), delete_dataset_command).unwrap();
}
}

Expand Down

0 comments on commit 2f96ce7

Please sign in to comment.