From 78ce5a5a74d9d1a7a8032c1fdca9573d59d0a150 Mon Sep 17 00:00:00 2001
From: Grant Lemons <grantlemons@aol.com>
Date: Fri, 1 Nov 2024 21:41:41 -0600
Subject: [PATCH] fix(fst-dict): remove duplication by importing types from
 parsing crate

Types include:
- Span
- CharString
- WordMetadata
---
 harper-core/src/char_string.rs               |  23 ----
 harper-core/src/document.rs                  |   2 +-
 harper-core/src/lib.rs                       |   6 +-
 harper-core/src/linting/lint.rs              |   2 +-
 harper-core/src/span.rs                      | 138 -------------------
 harper-core/src/spell/fst_dictionary.rs      |   3 +-
 harper-core/src/spell/merged_dictionary.rs   |   3 +-
 harper-core/src/spell/mod.rs                 |   3 +-
 harper-core/src/token.rs                     |   2 +-
 harper-dictionary-parsing/src/char_string.rs |  18 +++
 harper-dictionary-parsing/src/lib.rs         |   4 +-
 11 files changed, 28 insertions(+), 176 deletions(-)
 delete mode 100644 harper-core/src/char_string.rs
 delete mode 100644 harper-core/src/span.rs

diff --git a/harper-core/src/char_string.rs b/harper-core/src/char_string.rs
deleted file mode 100644
index 899f5bff..00000000
--- a/harper-core/src/char_string.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-use smallvec::SmallVec;
-
-/// A char sequence that improves cache locality.
-/// Most English words are fewer than 12 characters.
-pub type CharString = SmallVec<[char; 12]>;
-
-pub trait CharStringExt {
-    fn to_lower(&self) -> CharString;
-    fn to_string(&self) -> String;
-}
-
-impl CharStringExt for [char] {
-    fn to_lower(&self) -> CharString {
-        let mut out = CharString::with_capacity(self.len());
-
-        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
-
-        out
-    }
-    fn to_string(&self) -> String {
-        self.iter().collect()
-    }
-}
diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs
index 94a41491..2396e1ce 100644
--- a/harper-core/src/document.rs
+++ b/harper-core/src/document.rs
@@ -7,9 +7,9 @@ use paste::paste;
 use crate::parsers::{Markdown, Parser, PlainEnglish};
 use crate::patterns::{PatternExt, RepeatingPattern, SequencePattern};
 use crate::punctuation::Punctuation;
-use crate::span::Span;
 use crate::token::NumberSuffix;
 use crate::vec_ext::VecExt;
+use crate::Span;
 use crate::{Dictionary, FatToken, FstDictionary, Lrc, Token, TokenKind, TokenStringExt};
 
 /// A document containing some amount of lexed and parsed English text.
diff --git a/harper-core/src/lib.rs b/harper-core/src/lib.rs
index 20e64877..ac55825b 100644
--- a/harper-core/src/lib.rs
+++ b/harper-core/src/lib.rs
@@ -2,7 +2,6 @@
 #![allow(dead_code)]
 
 mod char_ext;
-mod char_string;
 mod document;
 pub mod language_detection;
 mod lexing;
@@ -11,7 +10,6 @@ mod mask;
 pub mod parsers;
 pub mod patterns;
 mod punctuation;
-mod span;
 mod spell;
 mod sync;
 mod token;
@@ -19,13 +17,13 @@ mod vec_ext;
 
 use std::collections::VecDeque;
 
-pub use char_string::{CharString, CharStringExt};
 pub use document::Document;
+pub use harper_dictionary_parsing::char_string::{CharString, CharStringExt};
+pub use harper_dictionary_parsing::span::Span;
 pub use harper_dictionary_parsing::{word_metadata::Tense, WordMetadata};
 use linting::Lint;
 pub use mask::{Mask, Masker};
 pub use punctuation::{Punctuation, Quote};
-pub use span::Span;
 pub use spell::{Dictionary, FstDictionary, FullDictionary, MergedDictionary};
 pub use sync::Lrc;
 pub use token::{FatToken, Token, TokenKind, TokenStringExt};
diff --git a/harper-core/src/linting/lint.rs b/harper-core/src/linting/lint.rs
index 4ee8363f..8638d2b2 100644
--- a/harper-core/src/linting/lint.rs
+++ b/harper-core/src/linting/lint.rs
@@ -3,7 +3,7 @@ use std::fmt::Display;
 use is_macro::Is;
 use serde::{Deserialize, Serialize};
 
-use crate::span::Span;
+use crate::Span;
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Lint {
diff --git a/harper-core/src/span.rs b/harper-core/src/span.rs
deleted file mode 100644
index 042b910c..00000000
--- a/harper-core/src/span.rs
+++ /dev/null
@@ -1,138 +0,0 @@
-use std::ops::Range;
-
-use serde::{Deserialize, Serialize};
-
-/// A window in a [`char`] sequence.
-#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default, PartialEq, Eq)]
-pub struct Span {
-    pub start: usize,
-    pub end: usize,
-}
-
-impl Span {
-    pub fn new(start: usize, end: usize) -> Self {
-        if start > end {
-            panic!("{} > {}", start, end);
-        }
-        Self { start, end }
-    }
-
-    pub fn new_with_len(start: usize, len: usize) -> Self {
-        Self {
-            start,
-            end: start + len,
-        }
-    }
-
-    pub fn len(&self) -> usize {
-        self.end - self.start
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    pub fn contains(&self, idx: usize) -> bool {
-        assert!(self.start <= self.end);
-
-        self.start <= idx && idx < self.end
-    }
-
-    pub fn overlaps_with(&self, other: Self) -> bool {
-        (self.start < other.end) && (other.start < self.end)
-    }
-
-    /// Get the associated content. Will return [`None`] if any aspect is
-    /// invalid.
-    pub fn try_get_content<'a>(&self, source: &'a [char]) -> Option<&'a [char]> {
-        if (self.start > self.end) || (self.start >= source.len()) || (self.end > source.len()) {
-            if self.is_empty() {
-                return Some(&source[0..0]);
-            }
-            return None;
-        }
-
-        Some(&source[self.start..self.end])
-    }
-
-    /// Get the associated content. Will panic if any aspect is invalid.
-    pub fn get_content<'a>(&self, source: &'a [char]) -> &'a [char] {
-        self.try_get_content(source).unwrap()
-    }
-
-    pub fn get_content_string(&self, source: &[char]) -> String {
-        String::from_iter(self.get_content(source))
-    }
-
-    pub fn set_len(&mut self, length: usize) {
-        self.end = self.start + length;
-    }
-
-    pub fn with_len(&self, length: usize) -> Self {
-        let mut cloned = *self;
-        cloned.set_len(length);
-        cloned
-    }
-
-    // Add an amount to both [`Self::start`] and [`Self::end`]
-    pub fn push_by(&mut self, by: usize) {
-        self.start += by;
-        self.end += by;
-    }
-
-    // Subtract an amount to both [`Self::start`] and [`Self::end`]
-    pub fn pull_by(&mut self, by: usize) {
-        self.start -= by;
-        self.end -= by;
-    }
-
-    // Add an amount to a copy of both [`Self::start`] and [`Self::end`]
-    pub fn pushed_by(&self, by: usize) -> Self {
-        let mut clone = *self;
-        clone.start += by;
-        clone.end += by;
-        clone
-    }
-
-    // Subtract an amount to a copy of both [`Self::start`] and [`Self::end`]
-    pub fn pulled_by(&self, by: usize) -> Self {
-        let mut clone = *self;
-        clone.start -= by;
-        clone.end -= by;
-        clone
-    }
-
-    // Add an amount a copy of both [`Self::start`] and [`Self::end`]
-    pub fn with_offset(&self, by: usize) -> Self {
-        let mut clone = *self;
-        clone.push_by(by);
-        clone
-    }
-}
-
-impl From<Range<usize>> for Span {
-    fn from(value: Range<usize>) -> Self {
-        Self::new(value.start, value.end)
-    }
-}
-
-impl From<Span> for Range<usize> {
-    fn from(value: Span) -> Self {
-        value.start..value.end
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::Span;
-
-    #[test]
-    fn overlaps() {
-        assert!(Span::new(0, 5).overlaps_with(Span::new(3, 6)));
-        assert!(Span::new(0, 5).overlaps_with(Span::new(2, 3)));
-        assert!(Span::new(0, 5).overlaps_with(Span::new(4, 5)));
-        assert!(Span::new(0, 5).overlaps_with(Span::new(4, 4)));
-
-        assert!(!Span::new(0, 3).overlaps_with(Span::new(3, 5)));
-    }
-}
diff --git a/harper-core/src/spell/fst_dictionary.rs b/harper-core/src/spell/fst_dictionary.rs
index 552e9fbd..3f8582a6 100644
--- a/harper-core/src/spell/fst_dictionary.rs
+++ b/harper-core/src/spell/fst_dictionary.rs
@@ -1,12 +1,11 @@
 use super::{edit_distance_min_alloc, seq_to_normalized, FullDictionary};
 use fst::Map as FstMap;
 use fst::{automaton::Levenshtein, IntoStreamer};
-use harper_dictionary_parsing::CharString;
 use hashbrown::HashMap;
 use itertools::Itertools;
 use std::sync::Arc;
 
-use crate::{CharStringExt, WordMetadata};
+use crate::{CharString, CharStringExt, WordMetadata};
 
 use super::Dictionary;
 
diff --git a/harper-core/src/spell/merged_dictionary.rs b/harper-core/src/spell/merged_dictionary.rs
index c26fa0cb..6569b3ab 100644
--- a/harper-core/src/spell/merged_dictionary.rs
+++ b/harper-core/src/spell/merged_dictionary.rs
@@ -1,9 +1,8 @@
-use harper_dictionary_parsing::CharString;
 use itertools::Itertools;
 use std::sync::Arc;
 
 use super::dictionary::Dictionary;
-use crate::WordMetadata;
+use crate::{CharString, WordMetadata};
 
 /// A simple wrapper over [`Dictionary`] that allows
 /// one to merge multiple dictionaries without copying.
diff --git a/harper-core/src/spell/mod.rs b/harper-core/src/spell/mod.rs
index 1d9b96fb..b964f88a 100644
--- a/harper-core/src/spell/mod.rs
+++ b/harper-core/src/spell/mod.rs
@@ -1,9 +1,8 @@
 use std::borrow::Cow;
 
-use harper_dictionary_parsing::WordMetadata;
 use itertools::{Itertools, MinMaxResult};
 
-use crate::{CharString, CharStringExt};
+use crate::{CharString, CharStringExt, WordMetadata};
 
 pub use self::dictionary::Dictionary;
 pub use self::fst_dictionary::FstDictionary;
diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs
index 239c2c17..74efdcc5 100644
--- a/harper-core/src/token.rs
+++ b/harper-core/src/token.rs
@@ -5,7 +5,7 @@ use paste::paste;
 use serde::{Deserialize, Serialize};
 
 use crate::punctuation::Punctuation;
-use crate::span::Span;
+use crate::Span;
 use crate::{Quote, WordMetadata};
 use harper_dictionary_parsing::word_metadata::{ConjunctionData, NounData};
 
diff --git a/harper-dictionary-parsing/src/char_string.rs b/harper-dictionary-parsing/src/char_string.rs
index c2d97033..899f5bff 100644
--- a/harper-dictionary-parsing/src/char_string.rs
+++ b/harper-dictionary-parsing/src/char_string.rs
@@ -3,3 +3,21 @@ use smallvec::SmallVec;
 /// A char sequence that improves cache locality.
 /// Most English words are fewer than 12 characters.
 pub type CharString = SmallVec<[char; 12]>;
+
+pub trait CharStringExt {
+    fn to_lower(&self) -> CharString;
+    fn to_string(&self) -> String;
+}
+
+impl CharStringExt for [char] {
+    fn to_lower(&self) -> CharString {
+        let mut out = CharString::with_capacity(self.len());
+
+        out.extend(self.iter().flat_map(|v| v.to_lowercase()));
+
+        out
+    }
+    fn to_string(&self) -> String {
+        self.iter().collect()
+    }
+}
diff --git a/harper-dictionary-parsing/src/lib.rs b/harper-dictionary-parsing/src/lib.rs
index a57f10c7..f3f132f4 100644
--- a/harper-dictionary-parsing/src/lib.rs
+++ b/harper-dictionary-parsing/src/lib.rs
@@ -4,13 +4,13 @@ pub mod char_string;
 mod error;
 mod expansion;
 mod matcher;
-mod span;
+pub mod span;
 pub mod word_list;
 pub mod word_metadata;
 
 pub use attribute_list::AttributeList;
 use attribute_list::HumanReadableAttributeList;
-pub use char_string::CharString;
+pub use char_string::{CharString, CharStringExt};
 pub use error::Error;
 pub use span::Span;
 pub use word_metadata::WordMetadata;