Skip to content

Commit

Permalink
feat(commands): Add convenient names for read-data-subset n/m (#328)
Browse files Browse the repository at this point in the history
This PR adds convenience names for the `-read-data-subset=n/m` option
which allow for more easy regular check runs which cover the whole
repository data over a given period.

E.g. it allows to daily run `check --read-data
--read-data-subset=daily/month` which covers the repository once run at
every day of any month. Before, this requirement needed scripting to set
`n` to the number of the current day within the month and `m` to the
total count of days in the current month.

For `n`, it is now allowed to use
- `hourly`
- `daily`
- `weekly`
- `monthly`
- `yearly`
which is replaced by an index corresponding to the date/time the `check`
command is called.

For `m`, it is now allowed to use
- `day`
- `week`
- `month`
- `year`
which is depending on `n` replaced by the total count of
hours/days/weeks/months within the current entity. Moreover
`month_hours`, `month_days`, `year_hours`, `year_days` which work
independent from `n`.

Co-authored-by: simonsan <[email protected]>
  • Loading branch information
aawsome and simonsan authored Oct 14, 2024
1 parent c9c4285 commit 6e989b0
Show file tree
Hide file tree
Showing 19 changed files with 182 additions and 4 deletions.
96 changes: 92 additions & 4 deletions crates/core/src/commands/check.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
//! `check` subcommand
use std::{
collections::{BTreeSet, HashMap},
fmt::Debug,
str::FromStr,
};

use bytes::Bytes;
use bytesize::ByteSize;
use chrono::{Datelike, Local, NaiveDateTime, Timelike};
use derive_setters::Setters;
use log::{debug, error, warn};
use rand::{prelude::SliceRandom, thread_rng, Rng};
Expand Down Expand Up @@ -95,6 +97,47 @@ impl ReadSubsetOption {
}
}

/// parses n/m inclding named settings depending on current date
fn parse_n_m(now: NaiveDateTime, n_in: &str, m_in: &str) -> Result<(u32, u32), CommandErrorKind> {
let is_leap_year = |dt: NaiveDateTime| {
let year = dt.year();
year % 4 == 0 && (year % 25 != 0 || year % 16 == 0)
};

let days_of_month = |dt: NaiveDateTime| match dt.month() {
1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
4 | 6 | 9 | 11 => 30,
2 if is_leap_year(dt) => 29,
2 => 28,
_ => panic!("invalid month, should not happen"),
};

let days_of_year = |dt: NaiveDateTime| if is_leap_year(dt) { 366 } else { 365 };

let n = match n_in {
"hourly" => now.ordinal0() * 24 + now.hour(),
"daily" => now.ordinal0(),
"weekly" => now.iso_week().week0(),
"monthly" => now.month0(),
n => n.parse()?,
};

let m = match (n_in, m_in) {
("hourly", "day") => 24,
("hourly", "week") => 24 * 7,
("hourly", "month") | (_, "month_hours") => 24 * days_of_month(now),
("hourly", "year") | (_, "year_hours") => 24 * days_of_year(now),
("daily", "week") => 7,
("daily", "month") | (_, "month_days") => days_of_month(now),
("daily", "year") | (_, "year_days") => days_of_year(now),
("weekly", "month") => 4,
("weekly", "year") => 52,
("monthly", "year") => 12,
(_, m) => m.parse()?,
};
Ok((n % m, m))
}

impl FromStr for ReadSubsetOption {
type Err = CommandErrorKind;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Expand All @@ -104,8 +147,8 @@ impl FromStr for ReadSubsetOption {
// try to read percentage
Self::Percentage(p.parse()?)
} else if let Some((n, m)) = s.split_once('/') {
// try to read n/m
Self::IdSubSet((n.parse()?, m.parse()?))
let now = Local::now().naive_local();
Self::IdSubSet(parse_n_m(now, n, m)?)
} else {
Self::Size(
ByteSize::from_str(s)
Expand All @@ -131,8 +174,11 @@ pub struct CheckOptions {
#[cfg_attr(feature = "clap", clap(long))]
pub read_data: bool,

/// Read and check pack files
#[cfg_attr(feature = "clap", clap(long, default_value = "all"))]
/// Read only a subset of the data. Allowed values: "all", "n/m" for specific part, "x%" or a size for a random subset.
#[cfg_attr(
feature = "clap",
clap(long, default_value = "all", requires = "read_data")
)]
pub read_data_subset: ReadSubsetOption,
}

Expand Down Expand Up @@ -221,6 +267,7 @@ pub(crate) fn check_repository<P: ProgressBars, S: Open>(
.into_iter()
.filter(|p| packs.contains(&p.id));

debug!("using read-data-subset {:?}", opts.read_data_subset);
let packs = opts.read_data_subset.apply(packs);

repo.warm_up_wait(packs.iter().map(|pack| pack.id))?;
Expand Down Expand Up @@ -721,6 +768,47 @@ mod tests {
assert_ron_snapshot!(s, ids);
}

#[rstest]
#[case("5", "12")]
#[case("29", "28")]
#[case("15", "month_hours")]
#[case("4", "month_days")]
#[case("hourly", "day")]
#[case("hourly", "week")]
#[case("hourly", "month")]
#[case("hourly", "year")]
#[case("hourly", "20")]
#[case("daily", "week")]
#[case("daily", "month")]
#[case("daily", "year")]
#[case("daily", "15")]
#[case("weekly", "month")]
#[case("weekly", "year")]
#[case("weekly", "10")]
#[case("monthly", "year")]
#[case("monthly", "5")]
fn test_parse_n_m(#[case] n: &str, #[case] m: &str) {
let now: NaiveDateTime = "2024-10-11T12:00:00".parse().unwrap();
let res = parse_n_m(now, n, m).unwrap();
let now: NaiveDateTime = "2024-10-11T13:00:00".parse().unwrap();
let res_1h = parse_n_m(now, n, m).unwrap();
let now: NaiveDateTime = "2024-10-12T12:00:00".parse().unwrap();
let res_1d = parse_n_m(now, n, m).unwrap();
let now: NaiveDateTime = "2024-10-18T12:00:00".parse().unwrap();
let res_1w = parse_n_m(now, n, m).unwrap();
let now: NaiveDateTime = "2024-11-11T12:00:00".parse().unwrap();
let res_1m = parse_n_m(now, n, m).unwrap();
let now: NaiveDateTime = "2025-10-11T12:00:00".parse().unwrap();
let res_1y = parse_n_m(now, n, m).unwrap();
let now: NaiveDateTime = "2020-02-02T12:00:00".parse().unwrap();
let res2 = parse_n_m(now, n, m).unwrap();

assert_ron_snapshot!(
format!("n_m_{n}_{m}"),
(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)
);
}

fn test_read_subset_n_m() {
let test_packs = test_packs(&mut thread_rng());
let mut all_packs: BTreeSet<_> = test_packs.iter().map(|pack| pack.id).collect();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((15, 744), (15, 744), (15, 744), (15, 744), (15, 720), (15, 744), (15, 696))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((1, 28), (1, 28), (1, 28), (1, 28), (1, 28), (1, 28), (1, 28))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((4, 31), (4, 31), (4, 31), (4, 31), (4, 30), (4, 31), (4, 29))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((5, 12), (5, 12), (5, 12), (5, 12), (5, 12), (5, 12), (5, 12))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((14, 15), (14, 15), (0, 15), (6, 15), (0, 15), (13, 15), (2, 15))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((5, 31), (5, 31), (6, 31), (12, 31), (15, 30), (4, 31), (3, 29))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((4, 7), (4, 7), (5, 7), (4, 7), (0, 7), (3, 7), (4, 7))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((284, 366), (284, 366), (285, 366), (291, 366), (315, 366), (283, 365), (32, 366))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((8, 20), (9, 20), (12, 20), (16, 20), (12, 20), (4, 20), (0, 20))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((12, 24), (13, 24), (12, 24), (12, 24), (12, 24), (12, 24), (12, 24))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((132, 744), (133, 744), (156, 744), (300, 744), (372, 720), (108, 744), (84, 696))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((108, 168), (109, 168), (132, 168), (108, 168), (12, 168), (84, 168), (108, 168))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((6828, 8784), (6829, 8784), (6852, 8784), (6996, 8784), (7572, 8784), (6804, 8760), (780, 8784))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((4, 5), (4, 5), (4, 5), (4, 5), (0, 5), (4, 5), (1, 5))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((9, 12), (9, 12), (9, 12), (9, 12), (10, 12), (9, 12), (1, 12))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((0, 10), (0, 10), (0, 10), (1, 10), (5, 10), (0, 10), (4, 10))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((0, 4), (0, 4), (0, 4), (1, 4), (1, 4), (0, 4), (0, 4))
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/core/src/commands/check.rs
expression: "(res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)"
---
((40, 52), (40, 52), (40, 52), (41, 52), (45, 52), (40, 52), (4, 52))

0 comments on commit 6e989b0

Please sign in to comment.