From 78ce5a5a74d9d1a7a8032c1fdca9573d59d0a150 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Fri, 1 Nov 2024 21:41:41 -0600 Subject: [PATCH] fix(fst-dict): remove duplication by importing types from parsing crate Types include: - Span - CharString - WordMetadata --- harper-core/src/char_string.rs | 23 ---- harper-core/src/document.rs | 2 +- harper-core/src/lib.rs | 6 +- harper-core/src/linting/lint.rs | 2 +- harper-core/src/span.rs | 138 ------------------- harper-core/src/spell/fst_dictionary.rs | 3 +- harper-core/src/spell/merged_dictionary.rs | 3 +- harper-core/src/spell/mod.rs | 3 +- harper-core/src/token.rs | 2 +- harper-dictionary-parsing/src/char_string.rs | 18 +++ harper-dictionary-parsing/src/lib.rs | 4 +- 11 files changed, 28 insertions(+), 176 deletions(-) delete mode 100644 harper-core/src/char_string.rs delete mode 100644 harper-core/src/span.rs diff --git a/harper-core/src/char_string.rs b/harper-core/src/char_string.rs deleted file mode 100644 index 899f5bff..00000000 --- a/harper-core/src/char_string.rs +++ /dev/null @@ -1,23 +0,0 @@ -use smallvec::SmallVec; - -/// A char sequence that improves cache locality. -/// Most English words are fewer than 12 characters. -pub type CharString = SmallVec<[char; 12]>; - -pub trait CharStringExt { - fn to_lower(&self) -> CharString; - fn to_string(&self) -> String; -} - -impl CharStringExt for [char] { - fn to_lower(&self) -> CharString { - let mut out = CharString::with_capacity(self.len()); - - out.extend(self.iter().flat_map(|v| v.to_lowercase())); - - out - } - fn to_string(&self) -> String { - self.iter().collect() - } -} diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index 94a41491..2396e1ce 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -7,9 +7,9 @@ use paste::paste; use crate::parsers::{Markdown, Parser, PlainEnglish}; use crate::patterns::{PatternExt, RepeatingPattern, SequencePattern}; use crate::punctuation::Punctuation; -use crate::span::Span; use crate::token::NumberSuffix; use crate::vec_ext::VecExt; +use crate::Span; use crate::{Dictionary, FatToken, FstDictionary, Lrc, Token, TokenKind, TokenStringExt}; /// A document containing some amount of lexed and parsed English text. diff --git a/harper-core/src/lib.rs b/harper-core/src/lib.rs index 20e64877..ac55825b 100644 --- a/harper-core/src/lib.rs +++ b/harper-core/src/lib.rs @@ -2,7 +2,6 @@ #![allow(dead_code)] mod char_ext; -mod char_string; mod document; pub mod language_detection; mod lexing; @@ -11,7 +10,6 @@ mod mask; pub mod parsers; pub mod patterns; mod punctuation; -mod span; mod spell; mod sync; mod token; @@ -19,13 +17,13 @@ mod vec_ext; use std::collections::VecDeque; -pub use char_string::{CharString, CharStringExt}; pub use document::Document; +pub use harper_dictionary_parsing::char_string::{CharString, CharStringExt}; +pub use harper_dictionary_parsing::span::Span; pub use harper_dictionary_parsing::{word_metadata::Tense, WordMetadata}; use linting::Lint; pub use mask::{Mask, Masker}; pub use punctuation::{Punctuation, Quote}; -pub use span::Span; pub use spell::{Dictionary, FstDictionary, FullDictionary, MergedDictionary}; pub use sync::Lrc; pub use token::{FatToken, Token, TokenKind, TokenStringExt}; diff --git a/harper-core/src/linting/lint.rs b/harper-core/src/linting/lint.rs index 4ee8363f..8638d2b2 100644 --- a/harper-core/src/linting/lint.rs +++ b/harper-core/src/linting/lint.rs @@ -3,7 +3,7 @@ use std::fmt::Display; use is_macro::Is; use serde::{Deserialize, Serialize}; -use crate::span::Span; +use crate::Span; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Lint { diff --git a/harper-core/src/span.rs b/harper-core/src/span.rs deleted file mode 100644 index 042b910c..00000000 --- a/harper-core/src/span.rs +++ /dev/null @@ -1,138 +0,0 @@ -use std::ops::Range; - -use serde::{Deserialize, Serialize}; - -/// A window in a [`char`] sequence. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)] -pub struct Span { - pub start: usize, - pub end: usize, -} - -impl Span { - pub fn new(start: usize, end: usize) -> Self { - if start > end { - panic!("{} > {}", start, end); - } - Self { start, end } - } - - pub fn new_with_len(start: usize, len: usize) -> Self { - Self { - start, - end: start + len, - } - } - - pub fn len(&self) -> usize { - self.end - self.start - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn contains(&self, idx: usize) -> bool { - assert!(self.start <= self.end); - - self.start <= idx && idx < self.end - } - - pub fn overlaps_with(&self, other: Self) -> bool { - (self.start < other.end) && (other.start < self.end) - } - - /// Get the associated content. Will return [`None`] if any aspect is - /// invalid. - pub fn try_get_content<'a>(&self, source: &'a [char]) -> Option<&'a [char]> { - if (self.start > self.end) || (self.start >= source.len()) || (self.end > source.len()) { - if self.is_empty() { - return Some(&source[0..0]); - } - return None; - } - - Some(&source[self.start..self.end]) - } - - /// Get the associated content. Will panic if any aspect is invalid. - pub fn get_content<'a>(&self, source: &'a [char]) -> &'a [char] { - self.try_get_content(source).unwrap() - } - - pub fn get_content_string(&self, source: &[char]) -> String { - String::from_iter(self.get_content(source)) - } - - pub fn set_len(&mut self, length: usize) { - self.end = self.start + length; - } - - pub fn with_len(&self, length: usize) -> Self { - let mut cloned = *self; - cloned.set_len(length); - cloned - } - - // Add an amount to both [`Self::start`] and [`Self::end`] - pub fn push_by(&mut self, by: usize) { - self.start += by; - self.end += by; - } - - // Subtract an amount to both [`Self::start`] and [`Self::end`] - pub fn pull_by(&mut self, by: usize) { - self.start -= by; - self.end -= by; - } - - // Add an amount to a copy of both [`Self::start`] and [`Self::end`] - pub fn pushed_by(&self, by: usize) -> Self { - let mut clone = *self; - clone.start += by; - clone.end += by; - clone - } - - // Subtract an amount to a copy of both [`Self::start`] and [`Self::end`] - pub fn pulled_by(&self, by: usize) -> Self { - let mut clone = *self; - clone.start -= by; - clone.end -= by; - clone - } - - // Add an amount a copy of both [`Self::start`] and [`Self::end`] - pub fn with_offset(&self, by: usize) -> Self { - let mut clone = *self; - clone.push_by(by); - clone - } -} - -impl From> for Span { - fn from(value: Range) -> Self { - Self::new(value.start, value.end) - } -} - -impl From for Range { - fn from(value: Span) -> Self { - value.start..value.end - } -} - -#[cfg(test)] -mod tests { - use crate::Span; - - #[test] - fn overlaps() { - assert!(Span::new(0, 5).overlaps_with(Span::new(3, 6))); - assert!(Span::new(0, 5).overlaps_with(Span::new(2, 3))); - assert!(Span::new(0, 5).overlaps_with(Span::new(4, 5))); - assert!(Span::new(0, 5).overlaps_with(Span::new(4, 4))); - - assert!(!Span::new(0, 3).overlaps_with(Span::new(3, 5))); - } -} diff --git a/harper-core/src/spell/fst_dictionary.rs b/harper-core/src/spell/fst_dictionary.rs index 552e9fbd..3f8582a6 100644 --- a/harper-core/src/spell/fst_dictionary.rs +++ b/harper-core/src/spell/fst_dictionary.rs @@ -1,12 +1,11 @@ use super::{edit_distance_min_alloc, seq_to_normalized, FullDictionary}; use fst::Map as FstMap; use fst::{automaton::Levenshtein, IntoStreamer}; -use harper_dictionary_parsing::CharString; use hashbrown::HashMap; use itertools::Itertools; use std::sync::Arc; -use crate::{CharStringExt, WordMetadata}; +use crate::{CharString, CharStringExt, WordMetadata}; use super::Dictionary; diff --git a/harper-core/src/spell/merged_dictionary.rs b/harper-core/src/spell/merged_dictionary.rs index c26fa0cb..6569b3ab 100644 --- a/harper-core/src/spell/merged_dictionary.rs +++ b/harper-core/src/spell/merged_dictionary.rs @@ -1,9 +1,8 @@ -use harper_dictionary_parsing::CharString; use itertools::Itertools; use std::sync::Arc; use super::dictionary::Dictionary; -use crate::WordMetadata; +use crate::{CharString, WordMetadata}; /// A simple wrapper over [`Dictionary`] that allows /// one to merge multiple dictionaries without copying. diff --git a/harper-core/src/spell/mod.rs b/harper-core/src/spell/mod.rs index 1d9b96fb..b964f88a 100644 --- a/harper-core/src/spell/mod.rs +++ b/harper-core/src/spell/mod.rs @@ -1,9 +1,8 @@ use std::borrow::Cow; -use harper_dictionary_parsing::WordMetadata; use itertools::{Itertools, MinMaxResult}; -use crate::{CharString, CharStringExt}; +use crate::{CharString, CharStringExt, WordMetadata}; pub use self::dictionary::Dictionary; pub use self::fst_dictionary::FstDictionary; diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 239c2c17..74efdcc5 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -5,7 +5,7 @@ use paste::paste; use serde::{Deserialize, Serialize}; use crate::punctuation::Punctuation; -use crate::span::Span; +use crate::Span; use crate::{Quote, WordMetadata}; use harper_dictionary_parsing::word_metadata::{ConjunctionData, NounData}; diff --git a/harper-dictionary-parsing/src/char_string.rs b/harper-dictionary-parsing/src/char_string.rs index c2d97033..899f5bff 100644 --- a/harper-dictionary-parsing/src/char_string.rs +++ b/harper-dictionary-parsing/src/char_string.rs @@ -3,3 +3,21 @@ use smallvec::SmallVec; /// A char sequence that improves cache locality. /// Most English words are fewer than 12 characters. pub type CharString = SmallVec<[char; 12]>; + +pub trait CharStringExt { + fn to_lower(&self) -> CharString; + fn to_string(&self) -> String; +} + +impl CharStringExt for [char] { + fn to_lower(&self) -> CharString { + let mut out = CharString::with_capacity(self.len()); + + out.extend(self.iter().flat_map(|v| v.to_lowercase())); + + out + } + fn to_string(&self) -> String { + self.iter().collect() + } +} diff --git a/harper-dictionary-parsing/src/lib.rs b/harper-dictionary-parsing/src/lib.rs index a57f10c7..f3f132f4 100644 --- a/harper-dictionary-parsing/src/lib.rs +++ b/harper-dictionary-parsing/src/lib.rs @@ -4,13 +4,13 @@ pub mod char_string; mod error; mod expansion; mod matcher; -mod span; +pub mod span; pub mod word_list; pub mod word_metadata; pub use attribute_list::AttributeList; use attribute_list::HumanReadableAttributeList; -pub use char_string::CharString; +pub use char_string::{CharString, CharStringExt}; pub use error::Error; pub use span::Span; pub use word_metadata::WordMetadata;