From 28b939598ed506597577b3c53d787929955d84b4 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 4 Feb 2025 08:18:44 -0700 Subject: [PATCH 1/8] fix(core): #574 by increasing the bar for similarity --- harper-core/src/linting/phrase_corrections.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/harper-core/src/linting/phrase_corrections.rs b/harper-core/src/linting/phrase_corrections.rs index 344c06ca..bdb4c39d 100644 --- a/harper-core/src/linting/phrase_corrections.rs +++ b/harper-core/src/linting/phrase_corrections.rs @@ -67,8 +67,8 @@ macro_rules! create_linter_for_phrase { }; } -create_linter_for_phrase!(TurnItOff, "turn it off", 2); -create_linter_for_phrase!(HumanLife, "human life", 2); +create_linter_for_phrase!(TurnItOff, "turn it off", 1); +create_linter_for_phrase!(HumanLife, "human life", 1); create_linter_for_phrase!(ThatChallenged, "that challenged", 2); create_linter_for_phrase!(NoLonger, "no longer", 1); create_linter_for_phrase!(NeedHelp, "need help", 1); @@ -82,6 +82,11 @@ mod tests { use super::{Decision, OfCourse, TurnItOff}; + #[test] + fn issue_574() { + assert_lint_count("run by one", TurnItOff::default(), 0); + } + #[test] fn turn_it_off_clean_lower() { assert_lint_count("turn it off", TurnItOff::default(), 0); From aae7421cebbf6e72814e6eb09af9648b2c4265ef Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 4 Feb 2025 14:44:52 -0700 Subject: [PATCH 2/8] fix(core): remove linter causing #582 --- harper-core/src/linting/lint_group.rs | 3 +-- harper-core/src/linting/mod.rs | 2 +- harper-core/src/linting/phrase_corrections.rs | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 580daf37..053dbecc 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -17,7 +17,7 @@ use super::merge_words::MergeWords; use super::multiple_sequential_pronouns::MultipleSequentialPronouns; use super::number_suffix_capitalization::NumberSuffixCapitalization; use super::phrase_corrections::{ - AndThis, Decision, HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff, + Decision, HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff, }; use super::plural_conjugate::PluralConjugate; use super::pronoun_contraction::PronounContraction; @@ -198,7 +198,6 @@ create_lint_group_config!( SomewhatSomething => true, LetsConfusion => true, DespiteOf => true, - AndThis => true, Decision => true, HumanLife => true, NeedHelp => true, diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 3cf5e763..369f28ec 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -64,7 +64,7 @@ pub use number_suffix_capitalization::NumberSuffixCapitalization; pub use oxford_comma::OxfordComma; pub use pattern_linter::PatternLinter; pub use phrase_corrections::{ - AndThis, Decision, HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff, + Decision, HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff, }; pub use plural_conjugate::PluralConjugate; pub use pronoun_contraction::PronounContraction; diff --git a/harper-core/src/linting/phrase_corrections.rs b/harper-core/src/linting/phrase_corrections.rs index bdb4c39d..47cc57ca 100644 --- a/harper-core/src/linting/phrase_corrections.rs +++ b/harper-core/src/linting/phrase_corrections.rs @@ -72,7 +72,6 @@ create_linter_for_phrase!(HumanLife, "human life", 1); create_linter_for_phrase!(ThatChallenged, "that challenged", 2); create_linter_for_phrase!(NoLonger, "no longer", 1); create_linter_for_phrase!(NeedHelp, "need help", 1); -create_linter_for_phrase!(AndThis, "and this", 1); create_linter_for_phrase!(Decision, "make a decision", 1); create_linter_for_phrase!(OfCourse, "of course", 1); From 17971486bcca009c10a9894fab9ceb9b17661dca Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 4 Feb 2025 15:18:53 -0700 Subject: [PATCH 3/8] feat(core): add more phrases --- harper-core/src/linting/phrase_corrections.rs | 72 ++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/harper-core/src/linting/phrase_corrections.rs b/harper-core/src/linting/phrase_corrections.rs index 47cc57ca..384eaded 100644 --- a/harper-core/src/linting/phrase_corrections.rs +++ b/harper-core/src/linting/phrase_corrections.rs @@ -75,11 +75,27 @@ create_linter_for_phrase!(NeedHelp, "need help", 1); create_linter_for_phrase!(Decision, "make a decision", 1); create_linter_for_phrase!(OfCourse, "of course", 1); +create_linter_for_phrase!(AndAlike, "and alike", 1); +create_linter_for_phrase!(BadRap, "bad rap", 1); +create_linter_for_phrase!(BatedBreath, "bated breath", 1); +create_linter_for_phrase!(BeckAndCall, "beck and call", 1); +create_linter_for_phrase!(ChangeTack, "change tack", 1); +create_linter_for_phrase!(HungerPang, "hunger pang", 3); +create_linter_for_phrase!(EnMasse, "en masse", 1); +create_linter_for_phrase!(LetAlone, "let alone", 1); +create_linter_for_phrase!(LoAndBehold, "lo and behold", 2); +create_linter_for_phrase!(MootPoint, "moot point", 3); +create_linter_for_phrase!(SneakingSuspicion, "sneaking suspicion", 3); +create_linter_for_phrase!(SupposeTo, "suppose to", 1); + #[cfg(test)] mod tests { use crate::linting::tests::{assert_lint_count, assert_suggestion_result}; - use super::{Decision, OfCourse, TurnItOff}; + use super::{ + BadRap, BatedBreath, ChangeTack, Decision, EnMasse, HungerPang, LetAlone, LoAndBehold, + MootPoint, OfCourse, SneakingSuspicion, SupposeTo, TurnItOff, + }; #[test] fn issue_574() { @@ -132,4 +148,58 @@ mod tests { "Yes, of course we should do that.", ); } + + #[test] + fn bad_rep() { + assert_suggestion_result("bad rep", BadRap::default(), "bad rap"); + } + + #[test] + fn baited_breath() { + assert_suggestion_result("baited breath", BatedBreath::default(), "bated breath"); + } + + #[test] + fn change_tact() { + assert_suggestion_result("change tact", ChangeTack::default(), "change tack"); + } + + #[test] + fn hunger_pain() { + assert_suggestion_result("hunger pain", HungerPang::default(), "hunger pang"); + } + + #[test] + fn in_mass() { + assert_suggestion_result("in mass", EnMasse::default(), "en masse"); + } + + #[test] + fn let_along() { + assert_suggestion_result("let along", LetAlone::default(), "let alone"); + } + + #[test] + fn long_and_behold() { + assert_suggestion_result("long and behold", LoAndBehold::default(), "lo and behold"); + } + + #[test] + fn mute_point() { + assert_suggestion_result("mute point", MootPoint::default(), "moot point"); + } + + #[test] + fn sneaky_suspicion() { + assert_suggestion_result( + "sneaky suspicion", + SneakingSuspicion::default(), + "sneaking suspicion", + ); + } + + #[test] + fn supposed_to() { + assert_suggestion_result("supposed to", SupposeTo::default(), "suppose to"); + } } From 4e95bc6317ec704904cf32816f99246aabee7905 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 4 Feb 2025 15:22:01 -0700 Subject: [PATCH 4/8] fix(core): remove cause of #581 --- harper-core/src/linting/lint_group.rs | 3 +-- harper-core/src/linting/mod.rs | 4 +--- harper-core/src/linting/phrase_corrections.rs | 15 ++------------- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 053dbecc..5ae73d9d 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -17,7 +17,7 @@ use super::merge_words::MergeWords; use super::multiple_sequential_pronouns::MultipleSequentialPronouns; use super::number_suffix_capitalization::NumberSuffixCapitalization; use super::phrase_corrections::{ - Decision, HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff, + HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff, }; use super::plural_conjugate::PluralConjugate; use super::pronoun_contraction::PronounContraction; @@ -198,7 +198,6 @@ create_lint_group_config!( SomewhatSomething => true, LetsConfusion => true, DespiteOf => true, - Decision => true, HumanLife => true, NeedHelp => true, NoLonger => true, diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 369f28ec..90e56c6d 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -63,9 +63,7 @@ pub use multiple_sequential_pronouns::MultipleSequentialPronouns; pub use number_suffix_capitalization::NumberSuffixCapitalization; pub use oxford_comma::OxfordComma; pub use pattern_linter::PatternLinter; -pub use phrase_corrections::{ - Decision, HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff, -}; +pub use phrase_corrections::{HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff}; pub use plural_conjugate::PluralConjugate; pub use pronoun_contraction::PronounContraction; pub use proper_noun_capitalization_linters::{ diff --git a/harper-core/src/linting/phrase_corrections.rs b/harper-core/src/linting/phrase_corrections.rs index 384eaded..0ac7bbcf 100644 --- a/harper-core/src/linting/phrase_corrections.rs +++ b/harper-core/src/linting/phrase_corrections.rs @@ -72,9 +72,7 @@ create_linter_for_phrase!(HumanLife, "human life", 1); create_linter_for_phrase!(ThatChallenged, "that challenged", 2); create_linter_for_phrase!(NoLonger, "no longer", 1); create_linter_for_phrase!(NeedHelp, "need help", 1); -create_linter_for_phrase!(Decision, "make a decision", 1); create_linter_for_phrase!(OfCourse, "of course", 1); - create_linter_for_phrase!(AndAlike, "and alike", 1); create_linter_for_phrase!(BadRap, "bad rap", 1); create_linter_for_phrase!(BatedBreath, "bated breath", 1); @@ -93,8 +91,8 @@ mod tests { use crate::linting::tests::{assert_lint_count, assert_suggestion_result}; use super::{ - BadRap, BatedBreath, ChangeTack, Decision, EnMasse, HungerPang, LetAlone, LoAndBehold, - MootPoint, OfCourse, SneakingSuspicion, SupposeTo, TurnItOff, + BadRap, BatedBreath, ChangeTack, EnMasse, HungerPang, LetAlone, LoAndBehold, MootPoint, + OfCourse, SneakingSuspicion, SupposeTo, TurnItOff, }; #[test] @@ -122,15 +120,6 @@ mod tests { assert_suggestion_result("Turn i of", TurnItOff::default(), "Turn it off"); } - #[test] - fn take_a_decision() { - assert_suggestion_result( - "we should take a decision on this", - Decision::default(), - "we should make a decision on this", - ); - } - #[test] fn off_course() { assert_suggestion_result( From e058a1add8d373ff2b3c56dd19c06a1ca16594f7 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 4 Feb 2025 15:34:52 -0700 Subject: [PATCH 5/8] feat(core): expose new rules --- harper-core/src/linting/lint_group.rs | 18 ++++++++++++++++-- harper-core/src/linting/mod.rs | 6 +++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index 5ae73d9d..db5493ba 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -17,7 +17,9 @@ use super::merge_words::MergeWords; use super::multiple_sequential_pronouns::MultipleSequentialPronouns; use super::number_suffix_capitalization::NumberSuffixCapitalization; use super::phrase_corrections::{ - HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff, + AndAlike, BadRap, BatedBreath, BeckAndCall, ChangeTack, EnMasse, HumanLife, HungerPang, + LetAlone, LoAndBehold, MootPoint, NeedHelp, NoLonger, OfCourse, SneakingSuspicion, SupposeTo, + ThatChallenged, TurnItOff, }; use super::plural_conjugate::PluralConjugate; use super::pronoun_contraction::PronounContraction; @@ -203,7 +205,19 @@ create_lint_group_config!( NoLonger => true, ThatChallenged => true, TurnItOff => true, - OfCourse => true + OfCourse => true, + AndAlike => true, + BadRap => true, + BatedBreath => true, + BeckAndCall => true, + ChangeTack => true, + HungerPang => true, + EnMasse => true, + LetAlone => true, + LoAndBehold => true, + MootPoint => true, + SneakingSuspicion => true, + SupposeTo => true ); impl Default for LintGroup { diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 90e56c6d..8fe7a418 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -63,7 +63,11 @@ pub use multiple_sequential_pronouns::MultipleSequentialPronouns; pub use number_suffix_capitalization::NumberSuffixCapitalization; pub use oxford_comma::OxfordComma; pub use pattern_linter::PatternLinter; -pub use phrase_corrections::{HumanLife, NeedHelp, NoLonger, OfCourse, ThatChallenged, TurnItOff}; +pub use phrase_corrections::{ + AndAlike, BadRap, BatedBreath, BeckAndCall, ChangeTack, EnMasse, HumanLife, HungerPang, + LetAlone, LoAndBehold, MootPoint, NeedHelp, NoLonger, OfCourse, SneakingSuspicion, SupposeTo, + ThatChallenged, TurnItOff, +}; pub use plural_conjugate::PluralConjugate; pub use pronoun_contraction::PronounContraction; pub use proper_noun_capitalization_linters::{ From aeca94dd459cfde869ccc5e39ef0daa1778c8f84 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 4 Feb 2025 15:44:10 -0700 Subject: [PATCH 6/8] fix(core): remove problematic `MootPoint` rule --- harper-core/src/linting/lint_group.rs | 3 +-- harper-core/src/linting/mod.rs | 2 +- harper-core/src/linting/phrase_corrections.rs | 10 ++-------- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index db5493ba..2f8ac1e9 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -18,7 +18,7 @@ use super::multiple_sequential_pronouns::MultipleSequentialPronouns; use super::number_suffix_capitalization::NumberSuffixCapitalization; use super::phrase_corrections::{ AndAlike, BadRap, BatedBreath, BeckAndCall, ChangeTack, EnMasse, HumanLife, HungerPang, - LetAlone, LoAndBehold, MootPoint, NeedHelp, NoLonger, OfCourse, SneakingSuspicion, SupposeTo, + LetAlone, LoAndBehold, NeedHelp, NoLonger, OfCourse, SneakingSuspicion, SupposeTo, ThatChallenged, TurnItOff, }; use super::plural_conjugate::PluralConjugate; @@ -215,7 +215,6 @@ create_lint_group_config!( EnMasse => true, LetAlone => true, LoAndBehold => true, - MootPoint => true, SneakingSuspicion => true, SupposeTo => true ); diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 8fe7a418..ee2d86f0 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -65,7 +65,7 @@ pub use oxford_comma::OxfordComma; pub use pattern_linter::PatternLinter; pub use phrase_corrections::{ AndAlike, BadRap, BatedBreath, BeckAndCall, ChangeTack, EnMasse, HumanLife, HungerPang, - LetAlone, LoAndBehold, MootPoint, NeedHelp, NoLonger, OfCourse, SneakingSuspicion, SupposeTo, + LetAlone, LoAndBehold, NeedHelp, NoLonger, OfCourse, SneakingSuspicion, SupposeTo, ThatChallenged, TurnItOff, }; pub use plural_conjugate::PluralConjugate; diff --git a/harper-core/src/linting/phrase_corrections.rs b/harper-core/src/linting/phrase_corrections.rs index 0ac7bbcf..095698e0 100644 --- a/harper-core/src/linting/phrase_corrections.rs +++ b/harper-core/src/linting/phrase_corrections.rs @@ -82,7 +82,6 @@ create_linter_for_phrase!(HungerPang, "hunger pang", 3); create_linter_for_phrase!(EnMasse, "en masse", 1); create_linter_for_phrase!(LetAlone, "let alone", 1); create_linter_for_phrase!(LoAndBehold, "lo and behold", 2); -create_linter_for_phrase!(MootPoint, "moot point", 3); create_linter_for_phrase!(SneakingSuspicion, "sneaking suspicion", 3); create_linter_for_phrase!(SupposeTo, "suppose to", 1); @@ -91,8 +90,8 @@ mod tests { use crate::linting::tests::{assert_lint_count, assert_suggestion_result}; use super::{ - BadRap, BatedBreath, ChangeTack, EnMasse, HungerPang, LetAlone, LoAndBehold, MootPoint, - OfCourse, SneakingSuspicion, SupposeTo, TurnItOff, + BadRap, BatedBreath, ChangeTack, EnMasse, HungerPang, LetAlone, LoAndBehold, OfCourse, + SneakingSuspicion, SupposeTo, TurnItOff, }; #[test] @@ -173,11 +172,6 @@ mod tests { assert_suggestion_result("long and behold", LoAndBehold::default(), "lo and behold"); } - #[test] - fn mute_point() { - assert_suggestion_result("mute point", MootPoint::default(), "moot point"); - } - #[test] fn sneaky_suspicion() { assert_suggestion_result( From ce063235619be51331ba01080d22e52b3e8b8012 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 4 Feb 2025 15:46:18 -0700 Subject: [PATCH 7/8] fix(core): Make `HungerPang` looser --- harper-core/src/linting/phrase_corrections.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/src/linting/phrase_corrections.rs b/harper-core/src/linting/phrase_corrections.rs index 095698e0..69cec4f5 100644 --- a/harper-core/src/linting/phrase_corrections.rs +++ b/harper-core/src/linting/phrase_corrections.rs @@ -78,7 +78,7 @@ create_linter_for_phrase!(BadRap, "bad rap", 1); create_linter_for_phrase!(BatedBreath, "bated breath", 1); create_linter_for_phrase!(BeckAndCall, "beck and call", 1); create_linter_for_phrase!(ChangeTack, "change tack", 1); -create_linter_for_phrase!(HungerPang, "hunger pang", 3); +create_linter_for_phrase!(HungerPang, "hunger pang", 2); create_linter_for_phrase!(EnMasse, "en masse", 1); create_linter_for_phrase!(LetAlone, "let alone", 1); create_linter_for_phrase!(LoAndBehold, "lo and behold", 2); From f7b48c4ce8d2d5221fb356cd9a2d1cd6aa3c712b Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Tue, 4 Feb 2025 18:42:51 -0700 Subject: [PATCH 8/8] refactor(core): move `TokenStringExt` into its own module --- harper-core/src/lib.rs | 4 +- .../src/linting/correct_number_suffix.rs | 2 +- harper-core/src/linting/linking_verbs.rs | 2 +- harper-core/src/linting/long_sentences.rs | 2 +- .../linting/number_suffix_capitalization.rs | 2 +- harper-core/src/linting/repeated_words.rs | 2 +- harper-core/src/linting/spaces.rs | 2 +- harper-core/src/token.rs | 235 +----------------- harper-core/src/token_string_ext.rs | 234 +++++++++++++++++ 9 files changed, 244 insertions(+), 241 deletions(-) create mode 100644 harper-core/src/token_string_ext.rs diff --git a/harper-core/src/lib.rs b/harper-core/src/lib.rs index 462a4c6a..ccf9df6c 100644 --- a/harper-core/src/lib.rs +++ b/harper-core/src/lib.rs @@ -21,6 +21,7 @@ mod sync; mod title_case; mod token; mod token_kind; +mod token_string_ext; mod vec_ext; mod word_metadata; @@ -38,8 +39,9 @@ pub use span::Span; pub use spell::{Dictionary, FstDictionary, FullDictionary, MergedDictionary}; pub use sync::Lrc; pub use title_case::{make_title_case, make_title_case_str}; -pub use token::{Token, TokenStringExt}; +pub use token::Token; pub use token_kind::{NumberSuffix, TokenKind}; +pub use token_string_ext::TokenStringExt; pub use vec_ext::VecExt; pub use word_metadata::{AdverbData, ConjunctionData, NounData, Tense, VerbData, WordMetadata}; diff --git a/harper-core/src/linting/correct_number_suffix.rs b/harper-core/src/linting/correct_number_suffix.rs index 7237b49c..a2fbfb0a 100644 --- a/harper-core/src/linting/correct_number_suffix.rs +++ b/harper-core/src/linting/correct_number_suffix.rs @@ -1,5 +1,5 @@ use super::{Lint, LintKind, Linter, Suggestion}; -use crate::token::TokenStringExt; +use crate::TokenStringExt; use crate::{Document, NumberSuffix, Span, TokenKind}; /// Detect and warn that the sentence is too long. diff --git a/harper-core/src/linting/linking_verbs.rs b/harper-core/src/linting/linking_verbs.rs index 60fea3ca..ad89c4e2 100644 --- a/harper-core/src/linting/linking_verbs.rs +++ b/harper-core/src/linting/linking_verbs.rs @@ -1,6 +1,6 @@ use super::{Lint, LintKind, Linter}; -use crate::token::TokenStringExt; use crate::Document; +use crate::TokenStringExt; /// Detect and warn that the sentence is too long. #[derive(Debug, Clone, Copy, Default)] diff --git a/harper-core/src/linting/long_sentences.rs b/harper-core/src/linting/long_sentences.rs index 666d018e..fac2e8fc 100644 --- a/harper-core/src/linting/long_sentences.rs +++ b/harper-core/src/linting/long_sentences.rs @@ -1,5 +1,5 @@ use super::{Lint, LintKind, Linter}; -use crate::token::TokenStringExt; +use crate::TokenStringExt; use crate::{Document, Span}; /// Detect and warn that the sentence is too long. diff --git a/harper-core/src/linting/number_suffix_capitalization.rs b/harper-core/src/linting/number_suffix_capitalization.rs index 7c46950f..b7763a67 100644 --- a/harper-core/src/linting/number_suffix_capitalization.rs +++ b/harper-core/src/linting/number_suffix_capitalization.rs @@ -1,5 +1,5 @@ use super::{Lint, LintKind, Linter, Suggestion}; -use crate::token::TokenStringExt; +use crate::TokenStringExt; use crate::{Document, Span, TokenKind}; /// Detect and warn that the sentence is too long. diff --git a/harper-core/src/linting/repeated_words.rs b/harper-core/src/linting/repeated_words.rs index 61dd1bc2..1d10f4ab 100644 --- a/harper-core/src/linting/repeated_words.rs +++ b/harper-core/src/linting/repeated_words.rs @@ -1,7 +1,7 @@ use smallvec::smallvec; use super::{Lint, LintKind, Linter, Suggestion}; -use crate::token::TokenStringExt; +use crate::TokenStringExt; use crate::{CharString, CharStringExt, Document, Span}; #[derive(Debug, Clone)] diff --git a/harper-core/src/linting/spaces.rs b/harper-core/src/linting/spaces.rs index f9faa087..28fc5803 100644 --- a/harper-core/src/linting/spaces.rs +++ b/harper-core/src/linting/spaces.rs @@ -1,5 +1,5 @@ use super::{Lint, LintKind, Linter, Suggestion}; -use crate::token::TokenStringExt; +use crate::TokenStringExt; use crate::{Document, Token, TokenKind}; #[derive(Debug, Default)] diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 83eb246f..7c93f2ce 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -1,8 +1,6 @@ -use itertools::Itertools; -use paste::paste; use serde::{Deserialize, Serialize}; -use crate::{FatToken, Span, TokenKind}; +use crate::{FatToken, Span, TokenKind, TokenStringExt}; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Default)] pub struct Token { @@ -26,237 +24,6 @@ impl Token { } } -macro_rules! create_decl_for { - ($thing:ident) => { - paste! { - fn [< first_ $thing >](&self) -> Option; - - fn [< last_ $thing >](&self) -> Option; - - fn [< last_ $thing _index >](&self) -> Option; - - fn [](&self) -> impl Iterator + '_; - - fn [](&self) -> impl Iterator + '_; - } - }; -} - -macro_rules! create_fns_for { - ($thing:ident) => { - paste! { - fn [< first_ $thing >](&self) -> Option { - self.iter().find(|v| v.kind.[]()).copied() - } - - fn [< last_ $thing >](&self) -> Option { - self.iter().rev().find(|v| v.kind.[]()).copied() - } - - fn [< last_ $thing _index >](&self) -> Option { - self.iter().rev().position(|v| v.kind.[]()).map(|i| self.len() - i - 1) - } - - fn [](&self) -> impl Iterator + '_ { - self.iter() - .enumerate() - .filter(|(_, t)| t.kind.[]()) - .map(|(i, _)| i) - } - - fn [](&self) -> impl Iterator + '_ { - self.[]().map(|i| self[i]) - } - } - }; -} - -/// Extension methods for [`Token`] sequences that make them easier to wrangle and query. -pub trait TokenStringExt { - fn first_sentence_word(&self) -> Option; - fn first_non_whitespace(&self) -> Option; - /// Grab the span that represents the beginning of the first element and the - /// end of the last element. - fn span(&self) -> Option; - - create_decl_for!(word); - create_decl_for!(word_like); - create_decl_for!(conjunction); - create_decl_for!(space); - create_decl_for!(apostrophe); - create_decl_for!(pipe); - create_decl_for!(quote); - create_decl_for!(number); - create_decl_for!(at); - create_decl_for!(ellipsis); - create_decl_for!(unlintable); - create_decl_for!(sentence_terminator); - create_decl_for!(paragraph_break); - create_decl_for!(chunk_terminator); - create_decl_for!(punctuation); - create_decl_for!(currency); - create_decl_for!(likely_homograph); - - fn iter_linking_verb_indices(&self) -> impl Iterator + '_; - fn iter_linking_verbs(&self) -> impl Iterator + '_; - - /// Iterate over chunks. - /// - /// For example, the following sentence contains two chunks separated by a - /// comma: - /// - /// ```text - /// Here is an example, it is short. - /// ``` - fn iter_chunks(&self) -> impl Iterator + '_; - - /// Get an iterator over token slices that represent the individual - /// paragraphs in a document. - fn iter_paragraphs(&self) -> impl Iterator + '_; - - /// Get an iterator over token slices that represent the individual - /// sentences in a document. - fn iter_sentences(&self) -> impl Iterator + '_; -} - -impl TokenStringExt for [Token] { - create_fns_for!(word); - create_fns_for!(word_like); - create_fns_for!(conjunction); - create_fns_for!(space); - create_fns_for!(apostrophe); - create_fns_for!(pipe); - create_fns_for!(quote); - create_fns_for!(number); - create_fns_for!(at); - create_fns_for!(punctuation); - create_fns_for!(ellipsis); - create_fns_for!(unlintable); - create_fns_for!(sentence_terminator); - create_fns_for!(paragraph_break); - create_fns_for!(chunk_terminator); - create_fns_for!(currency); - create_fns_for!(likely_homograph); - - fn first_non_whitespace(&self) -> Option { - self.iter().find(|t| !t.kind.is_whitespace()).copied() - } - - fn first_sentence_word(&self) -> Option { - let (w_idx, word) = self.iter().find_position(|v| v.kind.is_word())?; - - let Some(u_idx) = self.iter().position(|v| v.kind.is_unlintable()) else { - return Some(*word); - }; - - if w_idx < u_idx { - Some(*word) - } else { - None - } - } - - fn span(&self) -> Option { - let min_max = self - .iter() - .flat_map(|v| [v.span.start, v.span.end].into_iter()) - .minmax(); - - match min_max { - itertools::MinMaxResult::NoElements => None, - itertools::MinMaxResult::OneElement(min) => Some(Span::new(min, min)), - itertools::MinMaxResult::MinMax(min, max) => Some(Span::new(min, max)), - } - } - - fn iter_linking_verb_indices(&self) -> impl Iterator + '_ { - self.iter_word_indices().filter(|idx| { - let word = self[*idx]; - let TokenKind::Word(word) = word.kind else { - panic!("Should be unreachable."); - }; - - word.is_linking_verb() - }) - } - - fn iter_linking_verbs(&self) -> impl Iterator + '_ { - self.iter_linking_verb_indices().map(|idx| self[idx]) - } - - fn iter_chunks(&self) -> impl Iterator + '_ { - let first_chunk = self - .iter_chunk_terminator_indices() - .next() - .map(|first_term| &self[0..=first_term]); - - let rest = self - .iter_chunk_terminator_indices() - .tuple_windows() - .map(move |(a, b)| &self[a + 1..=b]); - - let last = if let Some(last_i) = self.last_chunk_terminator_index() { - if last_i + 1 < self.len() { - Some(&self[last_i + 1..]) - } else { - None - } - } else { - Some(self) - }; - - first_chunk.into_iter().chain(rest).chain(last) - } - - fn iter_paragraphs(&self) -> impl Iterator + '_ { - let first_pg = self - .iter_paragraph_break_indices() - .next() - .map(|first_term| &self[0..=first_term]); - - let rest = self - .iter_paragraph_break_indices() - .tuple_windows() - .map(move |(a, b)| &self[a + 1..=b]); - - let last_pg = if let Some(last_i) = self.last_paragraph_break_index() { - if last_i + 1 < self.len() { - Some(&self[last_i + 1..]) - } else { - None - } - } else { - Some(self) - }; - - first_pg.into_iter().chain(rest).chain(last_pg) - } - - fn iter_sentences(&self) -> impl Iterator + '_ { - let first_sentence = self - .iter_sentence_terminator_indices() - .next() - .map(|first_term| &self[0..=first_term]); - - let rest = self - .iter_sentence_terminator_indices() - .tuple_windows() - .map(move |(a, b)| &self[a + 1..=b]); - - let last_sentence = if let Some(last_i) = self.last_sentence_terminator_index() { - if last_i + 1 < self.len() { - Some(&self[last_i + 1..]) - } else { - None - } - } else { - Some(self) - }; - - first_sentence.into_iter().chain(rest).chain(last_sentence) - } -} - #[cfg(test)] mod tests { use crate::{ diff --git a/harper-core/src/token_string_ext.rs b/harper-core/src/token_string_ext.rs new file mode 100644 index 00000000..d3ceda72 --- /dev/null +++ b/harper-core/src/token_string_ext.rs @@ -0,0 +1,234 @@ +use crate::{Span, Token, TokenKind}; +use itertools::Itertools; +use paste::paste; + +macro_rules! create_decl_for { + ($thing:ident) => { + paste! { + fn [< first_ $thing >](&self) -> Option; + + fn [< last_ $thing >](&self) -> Option; + + fn [< last_ $thing _index >](&self) -> Option; + + fn [](&self) -> impl Iterator + '_; + + fn [](&self) -> impl Iterator + '_; + } + }; +} + +macro_rules! create_fns_for { + ($thing:ident) => { + paste! { + fn [< first_ $thing >](&self) -> Option { + self.iter().find(|v| v.kind.[]()).copied() + } + + fn [< last_ $thing >](&self) -> Option { + self.iter().rev().find(|v| v.kind.[]()).copied() + } + + fn [< last_ $thing _index >](&self) -> Option { + self.iter().rev().position(|v| v.kind.[]()).map(|i| self.len() - i - 1) + } + + fn [](&self) -> impl Iterator + '_ { + self.iter() + .enumerate() + .filter(|(_, t)| t.kind.[]()) + .map(|(i, _)| i) + } + + fn [](&self) -> impl Iterator + '_ { + self.[]().map(|i| self[i]) + } + } + }; +} + +/// Extension methods for [`Token`] sequences that make them easier to wrangle and query. +pub trait TokenStringExt { + fn first_sentence_word(&self) -> Option; + fn first_non_whitespace(&self) -> Option; + /// Grab the span that represents the beginning of the first element and the + /// end of the last element. + fn span(&self) -> Option; + + create_decl_for!(word); + create_decl_for!(word_like); + create_decl_for!(conjunction); + create_decl_for!(space); + create_decl_for!(apostrophe); + create_decl_for!(pipe); + create_decl_for!(quote); + create_decl_for!(number); + create_decl_for!(at); + create_decl_for!(ellipsis); + create_decl_for!(unlintable); + create_decl_for!(sentence_terminator); + create_decl_for!(paragraph_break); + create_decl_for!(chunk_terminator); + create_decl_for!(punctuation); + create_decl_for!(currency); + create_decl_for!(likely_homograph); + + fn iter_linking_verb_indices(&self) -> impl Iterator + '_; + fn iter_linking_verbs(&self) -> impl Iterator + '_; + + /// Iterate over chunks. + /// + /// For example, the following sentence contains two chunks separated by a + /// comma: + /// + /// ```text + /// Here is an example, it is short. + /// ``` + fn iter_chunks(&self) -> impl Iterator + '_; + + /// Get an iterator over token slices that represent the individual + /// paragraphs in a document. + fn iter_paragraphs(&self) -> impl Iterator + '_; + + /// Get an iterator over token slices that represent the individual + /// sentences in a document. + fn iter_sentences(&self) -> impl Iterator + '_; +} + +impl TokenStringExt for [Token] { + create_fns_for!(word); + create_fns_for!(word_like); + create_fns_for!(conjunction); + create_fns_for!(space); + create_fns_for!(apostrophe); + create_fns_for!(pipe); + create_fns_for!(quote); + create_fns_for!(number); + create_fns_for!(at); + create_fns_for!(punctuation); + create_fns_for!(ellipsis); + create_fns_for!(unlintable); + create_fns_for!(sentence_terminator); + create_fns_for!(paragraph_break); + create_fns_for!(chunk_terminator); + create_fns_for!(currency); + create_fns_for!(likely_homograph); + + fn first_non_whitespace(&self) -> Option { + self.iter().find(|t| !t.kind.is_whitespace()).copied() + } + + fn first_sentence_word(&self) -> Option { + let (w_idx, word) = self.iter().find_position(|v| v.kind.is_word())?; + + let Some(u_idx) = self.iter().position(|v| v.kind.is_unlintable()) else { + return Some(*word); + }; + + if w_idx < u_idx { + Some(*word) + } else { + None + } + } + + fn span(&self) -> Option { + let min_max = self + .iter() + .flat_map(|v| [v.span.start, v.span.end].into_iter()) + .minmax(); + + match min_max { + itertools::MinMaxResult::NoElements => None, + itertools::MinMaxResult::OneElement(min) => Some(Span::new(min, min)), + itertools::MinMaxResult::MinMax(min, max) => Some(Span::new(min, max)), + } + } + + fn iter_linking_verb_indices(&self) -> impl Iterator + '_ { + self.iter_word_indices().filter(|idx| { + let word = self[*idx]; + let TokenKind::Word(word) = word.kind else { + panic!("Should be unreachable."); + }; + + word.is_linking_verb() + }) + } + + fn iter_linking_verbs(&self) -> impl Iterator + '_ { + self.iter_linking_verb_indices().map(|idx| self[idx]) + } + + fn iter_chunks(&self) -> impl Iterator + '_ { + let first_chunk = self + .iter_chunk_terminator_indices() + .next() + .map(|first_term| &self[0..=first_term]); + + let rest = self + .iter_chunk_terminator_indices() + .tuple_windows() + .map(move |(a, b)| &self[a + 1..=b]); + + let last = if let Some(last_i) = self.last_chunk_terminator_index() { + if last_i + 1 < self.len() { + Some(&self[last_i + 1..]) + } else { + None + } + } else { + Some(self) + }; + + first_chunk.into_iter().chain(rest).chain(last) + } + + fn iter_paragraphs(&self) -> impl Iterator + '_ { + let first_pg = self + .iter_paragraph_break_indices() + .next() + .map(|first_term| &self[0..=first_term]); + + let rest = self + .iter_paragraph_break_indices() + .tuple_windows() + .map(move |(a, b)| &self[a + 1..=b]); + + let last_pg = if let Some(last_i) = self.last_paragraph_break_index() { + if last_i + 1 < self.len() { + Some(&self[last_i + 1..]) + } else { + None + } + } else { + Some(self) + }; + + first_pg.into_iter().chain(rest).chain(last_pg) + } + + fn iter_sentences(&self) -> impl Iterator + '_ { + let first_sentence = self + .iter_sentence_terminator_indices() + .next() + .map(|first_term| &self[0..=first_term]); + + let rest = self + .iter_sentence_terminator_indices() + .tuple_windows() + .map(move |(a, b)| &self[a + 1..=b]); + + let last_sentence = if let Some(last_i) = self.last_sentence_terminator_index() { + if last_i + 1 < self.len() { + Some(&self[last_i + 1..]) + } else { + None + } + } else { + Some(self) + }; + + first_sentence.into_iter().chain(rest).chain(last_sentence) + } +}