Skip to content

Commit

Permalink
Generalize a Requirements struct from Minimums
Browse files Browse the repository at this point in the history
  • Loading branch information
havenwood committed Jun 17, 2024
1 parent e30fc84 commit 9fe3579
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 59 deletions.
26 changes: 8 additions & 18 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
use once_cell::sync::Lazy;
use std::io::Cursor;
use word_tally::{Case, Minimums, Sort, WordTally};
use word_tally::{Case, Chars, Count, Requirements, Sort, WordTally};

const BASE_INPUT: &str = "Orchids bloom silently\nMicrocontrollers hum\nPhalaenopsis thrives\n\
Data packets route\nPhalaenopsis BLOOM\nDendrobium anchors\nPhotosynthesis proceeds\n\
Expand All @@ -20,7 +20,7 @@ fn bench_new_unsorted(c: &mut Criterion) {
c.bench_function("new_unsorted", |b| {
b.iter_batched(
prepare_input,
|input| WordTally::new(input, Case::Lower, Sort::Unsorted, Minimums::default()),
|input| WordTally::new(input, Case::Lower, Sort::Unsorted, Requirements::default()),
BatchSize::SmallInput,
);
});
Expand All @@ -30,7 +30,7 @@ fn bench_new_sorted(c: &mut Criterion) {
c.bench_function("new_sorted", |b| {
b.iter_batched(
prepare_input,
|input| WordTally::new(input, Case::Lower, Sort::Asc, Minimums::default()),
|input| WordTally::new(input, Case::Lower, Sort::Asc, Requirements::default()),
BatchSize::SmallInput,
);
});
Expand All @@ -45,9 +45,9 @@ fn bench_new_min_chars(c: &mut Criterion) {
input,
Case::Lower,
Sort::Unsorted,
Minimums {
chars: 5,
..Minimums::default()
Requirements {
chars: Chars::min(5),
count: Count::default(),
},
)
},
Expand All @@ -60,17 +60,7 @@ fn bench_new_min_count(c: &mut Criterion) {
c.bench_function("new_min_count", |b| {
b.iter_batched(
prepare_input,
|input| {
WordTally::new(
input,
Case::Lower,
Sort::Unsorted,
Minimums {
count: 2,
..Minimums::default()
},
)
},
|input| WordTally::new(input, Case::Lower, Sort::Unsorted, Requirements::default()),
BatchSize::SmallInput,
);
});
Expand All @@ -84,7 +74,7 @@ fn bench_sort(c: &mut Criterion) {
prepare_input(),
Case::Lower,
Sort::Unsorted,
Minimums::default(),
Requirements::default(),
)
},
|mut tally| tally.sort(Sort::Asc),
Expand Down
4 changes: 2 additions & 2 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ pub struct Args {
#[arg(short, long, default_value_t, value_enum, value_name = "FORMAT")]
pub case: Case,

/// Exclude words that contain fewer than min chars.
/// Exclude words containing fewer than min chars.
#[arg(short, long, default_value_t = 1, value_name = "COUNT")]
pub min_chars: usize,

/// Exclude words that appear fewer than min times.
/// Exclude words appearing fewer than min times.
#[arg(short = 'M', long, default_value_t = 1, value_name = "COUNT")]
pub min_count: u64,

Expand Down
56 changes: 41 additions & 15 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
//! # Examples
//!
//! ```
//! use word_tally::{Case, Minimums, Sort, WordTally};
//! use word_tally::{Case, Requirements, Sort, WordTally};
//!
//! let input = "Cinquedea".as_bytes();
//! let words = WordTally::new(input, Case::Lower, Sort::Desc, Minimums::default());
//! let words = WordTally::new(input, Case::Lower, Sort::Desc, Requirements::default());
//! let expected_tally = vec![("cinquedea".to_string(), 1)];
//!
//! assert_eq!(words.tally(), expected_tally);
Expand Down Expand Up @@ -106,22 +106,48 @@ impl fmt::Display for Sort {
}
}

/// Minimum requirements for a word to be included in the tally.
#[derive(Clone, Copy, Debug, Default)]
pub struct Minimums {
/// Min number of chars for words to be included.
pub chars: usize,
/// Min count of a word for it to be included.
pub count: u64,
/// Requirements for words to be included in the tally.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, PartialOrd, Ord)]
pub struct Requirements {
/// Word chars requirements for tallying.
pub chars: Chars,
/// Word count requirements for tallying.
pub count: Count,
}

/// Word chars requirements for tallying.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, PartialOrd, Ord)]
pub struct Chars {
/// Min number of chars in a word for it to be tallied.
pub min: usize,
}

impl Chars {
pub const fn min(size: usize) -> Self {
Self { min: size }
}
}

/// Word count requirements for tallying.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, PartialOrd, Ord)]
pub struct Count {
/// Min number of a word must occur to be tallied.
pub min: u64,
}

impl Count {
pub const fn min(size: u64) -> Self {
Self { min: size }
}
}

/// `WordTally` fields are eagerly populated upon construction and exposed by getter methods.
impl WordTally {
/// Constructs a new `WordTally` from a source that implements `Read` like file or stdin.
pub fn new<T: Read>(input: T, case: Case, order: Sort, min: Minimums) -> Self {
let mut tally_map = Self::tally_map(input, case, min.chars);
if min.count > 1 {
tally_map.retain(|_, &mut count| count >= min.count);
pub fn new<T: Read>(input: T, case: Case, order: Sort, requirements: Requirements) -> Self {
let mut tally_map = Self::tally_map(input, case, requirements.chars);
if requirements.count.min > 1 {
tally_map.retain(|_, &mut count| count >= requirements.count.min);
}
let count = tally_map.values().sum();
let tally = Vec::from_iter(tally_map);
Expand Down Expand Up @@ -177,13 +203,13 @@ impl WordTally {
}

/// Creates a tally of optionally normalized words from input that implements `Read`.
fn tally_map<T: Read>(input: T, case: Case, min_chars: usize) -> HashMap<String, u64> {
fn tally_map<T: Read>(input: T, case: Case, chars: Chars) -> HashMap<String, u64> {
let mut tally = HashMap::new();
let lines = BufReader::new(input).lines();

for line in lines.map_while(Result::ok) {
line.unicode_words()
.filter(|unicode_word| min_chars <= 1 || unicode_word.len() >= min_chars)
.filter(|unicode_word| chars.min <= 1 || unicode_word.len() >= chars.min)
.for_each(|unicode_word| {
let word = match case {
Case::Lower => unicode_word.to_lowercase(),
Expand Down
10 changes: 5 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use clap::Parser;
use std::fs::File;
use std::io::{self, ErrorKind::BrokenPipe, LineWriter, Write};
use unescaper::unescape;
use word_tally::{Minimums, WordTally};
use word_tally::{Chars, Count, Requirements, WordTally};

/// `Writer` is a boxed type for dynamic dispatch of the `Write` trait.
type Writer = Box<dyn Write>;
Expand All @@ -19,11 +19,11 @@ fn main() -> Result<()> {
.input
.into_reader()
.with_context(|| format!("Failed to read {:#?}.", args.input.source))?;
let minimums = Minimums {
chars: args.min_chars,
count: args.min_count,
let requirements = Requirements {
chars: Chars::min(args.min_chars),
count: Count::min(args.min_count),
};
let word_tally = WordTally::new(reader, args.case, args.sort, minimums);
let word_tally = WordTally::new(reader, args.case, args.sort, requirements);
let delimiter = unescape(&args.delimiter)?;

if args.verbose || args.debug {
Expand Down
49 changes: 30 additions & 19 deletions tests/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::fs::File;
use std::hash::{DefaultHasher, Hash, Hasher};
use word_tally::{Case, Minimums, Sort, WordTally};
use word_tally::{Case, Chars, Count, Requirements, Sort, WordTally};

const TEST_WORDS_PATH: &str = "tests/files/words.txt";

Expand All @@ -11,15 +11,20 @@ struct ExpectedFields<'a> {
tally: Vec<(&'a str, u64)>,
}

fn word_tally(case: Case, sort: Sort, minimums: Minimums) -> WordTally {
fn word_tally(case: Case, sort: Sort, requirements: Requirements) -> WordTally {
let input = File::open(TEST_WORDS_PATH)
.expect("Expected test words file (`files/words.txt`) to be readable.");

WordTally::new(input, case, sort, minimums)
WordTally::new(input, case, sort, requirements)
}

fn word_tally_test(case: Case, sort: Sort, minimums: Minimums, fields: &ExpectedFields<'_>) {
let word_tally = word_tally(case, sort, minimums);
fn word_tally_test(
case: Case,
sort: Sort,
requirements: Requirements,
fields: &ExpectedFields<'_>,
) {
let word_tally = word_tally(case, sort, requirements);
assert_eq!(word_tally.count(), fields.count);
assert_eq!(word_tally.uniq_count(), fields.uniq_count);
assert_eq!(word_tally.avg(), fields.avg);
Expand All @@ -37,7 +42,7 @@ fn lower_case_desc_order() {
word_tally_test(
Case::Lower,
Sort::Desc,
Minimums::default(),
Requirements::default(),
&ExpectedFields {
count: 45,
uniq_count: 5,
Expand All @@ -52,7 +57,10 @@ fn min_char_count_at_max() {
word_tally_test(
Case::Lower,
Sort::Desc,
Minimums { chars: 3, count: 1 },
Requirements {
chars: Chars::min(3),
count: Count::default(),
},
&ExpectedFields {
count: 9,
uniq_count: 1,
Expand All @@ -67,7 +75,10 @@ fn min_char_count_above_max() {
word_tally_test(
Case::Lower,
Sort::Desc,
Minimums { chars: 4, count: 1 },
Requirements {
chars: Chars::min(4),
count: Count::default(),
},
&ExpectedFields {
count: 0,
uniq_count: 0,
Expand All @@ -82,7 +93,7 @@ fn min_char_count_at_min() {
word_tally_test(
Case::Lower,
Sort::Desc,
Minimums::default(),
Requirements::default(),
&ExpectedFields {
count: 45,
uniq_count: 5,
Expand All @@ -97,9 +108,9 @@ fn min_word_count_at_max() {
word_tally_test(
Case::Lower,
Sort::Desc,
Minimums {
chars: 1,
count: 15,
Requirements {
chars: Chars::default(),
count: Count::min(15),
},
&ExpectedFields {
count: 15,
Expand All @@ -115,7 +126,7 @@ fn upper_case_desc_order() {
word_tally_test(
Case::Upper,
Sort::Desc,
Minimums::default(),
Requirements::default(),
&ExpectedFields {
count: 45,
uniq_count: 5,
Expand All @@ -130,7 +141,7 @@ fn lower_case_asc_order() {
word_tally_test(
Case::Lower,
Sort::Asc,
Minimums::default(),
Requirements::default(),
&ExpectedFields {
count: 45,
uniq_count: 5,
Expand All @@ -145,7 +156,7 @@ fn upper_case_asc_order() {
word_tally_test(
Case::Upper,
Sort::Asc,
Minimums::default(),
Requirements::default(),
&ExpectedFields {
count: 45,
uniq_count: 5,
Expand All @@ -160,7 +171,7 @@ fn original_case_desc_order() {
word_tally_test(
Case::Original,
Sort::Desc,
Minimums::default(),
Requirements::default(),
&ExpectedFields {
count: 45,
uniq_count: 9,
Expand All @@ -185,7 +196,7 @@ fn original_case_asc_order() {
word_tally_test(
Case::Original,
Sort::Asc,
Minimums::default(),
Requirements::default(),
&ExpectedFields {
count: 45,
uniq_count: 9,
Expand Down Expand Up @@ -232,7 +243,7 @@ fn equality_and_hashing() {

let tallies: Vec<WordTally> = cases_and_sorts
.iter()
.map(|&(case, sort)| word_tally(case, sort, Minimums::default()))
.map(|&(case, sort)| word_tally(case, sort, Requirements::default()))
.collect();

for tally in &tallies {
Expand All @@ -252,7 +263,7 @@ fn equality_and_hashing() {

#[test]
fn vec_from() {
let tally = word_tally(Case::Lower, Sort::Desc, Minimums::default());
let tally = word_tally(Case::Lower, Sort::Desc, Requirements::default());

assert_eq!(
Vec::from(tally),
Expand Down

0 comments on commit 9fe3579

Please sign in to comment.