diff --git a/Cargo.lock b/Cargo.lock
index e897a1cd..220535a3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -875,4 +875,5 @@ dependencies = [
"rstest",
"serde",
"tempfile",
+ "unicode-segmentation",
]
diff --git a/Cargo.toml b/Cargo.toml
index 61c00d0f..a40a07b9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,6 +27,7 @@ glob = "0.3.0"
ordered-float = "2.0.0"
serde = { version = "1.0.116", features = ["derive"] }
tempfile = "3.1.0"
+unicode-segmentation = "1.8.0"
[target.'cfg(windows)'.dependencies]
rand = { version = "0.8.4", features = [
diff --git a/contrib/completions/_zoxide b/contrib/completions/_zoxide
index 178a27d3..2fe0400d 100644
--- a/contrib/completions/_zoxide
+++ b/contrib/completions/_zoxide
@@ -68,8 +68,8 @@ _arguments "${_arguments_options[@]}" \
'(-l --list)--interactive[Use interactive selection]' \
'(-i --interactive)-l[List all matching directories]' \
'(-i --interactive)--list[List all matching directories]' \
-'(-i --interactive)-s[Print score with results]' \
-'(-i --interactive)--score[Print score with results]' \
+'(-i --interactive)-s[Print score with results (keyword match score, frequency score)]' \
+'(-i --interactive)--score[Print score with results (keyword match score, frequency score)]' \
'-h[Print help information]' \
'--help[Print help information]' \
'-V[Print version information]' \
diff --git a/contrib/completions/_zoxide.ps1 b/contrib/completions/_zoxide.ps1
index 72138e63..4a427e6a 100644
--- a/contrib/completions/_zoxide.ps1
+++ b/contrib/completions/_zoxide.ps1
@@ -64,8 +64,8 @@ Register-ArgumentCompleter -Native -CommandName 'zoxide' -ScriptBlock {
[CompletionResult]::new('--interactive', 'interactive', [CompletionResultType]::ParameterName, 'Use interactive selection')
[CompletionResult]::new('-l', 'l', [CompletionResultType]::ParameterName, 'List all matching directories')
[CompletionResult]::new('--list', 'list', [CompletionResultType]::ParameterName, 'List all matching directories')
- [CompletionResult]::new('-s', 's', [CompletionResultType]::ParameterName, 'Print score with results')
- [CompletionResult]::new('--score', 'score', [CompletionResultType]::ParameterName, 'Print score with results')
+ [CompletionResult]::new('-s', 's', [CompletionResultType]::ParameterName, 'Print score with results (keyword match score, frequency score)')
+ [CompletionResult]::new('--score', 'score', [CompletionResultType]::ParameterName, 'Print score with results (keyword match score, frequency score)')
[CompletionResult]::new('-h', 'h', [CompletionResultType]::ParameterName, 'Print help information')
[CompletionResult]::new('--help', 'help', [CompletionResultType]::ParameterName, 'Print help information')
[CompletionResult]::new('-V', 'V', [CompletionResultType]::ParameterName, 'Print version information')
diff --git a/contrib/completions/zoxide.elv b/contrib/completions/zoxide.elv
index dfdebc23..5783df10 100644
--- a/contrib/completions/zoxide.elv
+++ b/contrib/completions/zoxide.elv
@@ -58,8 +58,8 @@ set edit:completion:arg-completer[zoxide] = [@words]{
cand --interactive 'Use interactive selection'
cand -l 'List all matching directories'
cand --list 'List all matching directories'
- cand -s 'Print score with results'
- cand --score 'Print score with results'
+ cand -s 'Print score with results (keyword match score, frequency score)'
+ cand --score 'Print score with results (keyword match score, frequency score)'
cand -h 'Print help information'
cand --help 'Print help information'
cand -V 'Print version information'
diff --git a/contrib/completions/zoxide.fish b/contrib/completions/zoxide.fish
index 1ca8db01..3d4a6077 100644
--- a/contrib/completions/zoxide.fish
+++ b/contrib/completions/zoxide.fish
@@ -20,7 +20,7 @@ complete -c zoxide -n "__fish_seen_subcommand_from query" -l exclude -d 'Exclude
complete -c zoxide -n "__fish_seen_subcommand_from query" -l all -d 'Show deleted directories'
complete -c zoxide -n "__fish_seen_subcommand_from query" -s i -l interactive -d 'Use interactive selection'
complete -c zoxide -n "__fish_seen_subcommand_from query" -s l -l list -d 'List all matching directories'
-complete -c zoxide -n "__fish_seen_subcommand_from query" -s s -l score -d 'Print score with results'
+complete -c zoxide -n "__fish_seen_subcommand_from query" -s s -l score -d 'Print score with results (keyword match score, frequency score)'
complete -c zoxide -n "__fish_seen_subcommand_from query" -s h -l help -d 'Print help information'
complete -c zoxide -n "__fish_seen_subcommand_from query" -s V -l version -d 'Print version information'
complete -c zoxide -n "__fish_seen_subcommand_from remove" -s i -l interactive -r
diff --git a/contrib/completions/zoxide.ts b/contrib/completions/zoxide.ts
index 0c41a758..b86362a1 100644
--- a/contrib/completions/zoxide.ts
+++ b/contrib/completions/zoxide.ts
@@ -159,7 +159,7 @@ const completion: Fig.Spec = {
},
{
name: ["-s", "--score"],
- description: "Print score with results",
+ description: "Print score with results (keyword match score, frequency score)",
},
{
name: ["-h", "--help"],
diff --git a/src/app/_app.rs b/src/app/_app.rs
index e40e3e74..941358b9 100644
--- a/src/app/_app.rs
+++ b/src/app/_app.rs
@@ -111,7 +111,7 @@ pub struct Query {
#[clap(long, short, conflicts_with = "interactive")]
pub list: bool,
- /// Print score with results
+ /// Print score with results (keyword match score, frequency score)
#[clap(long, short, conflicts_with = "interactive")]
pub score: bool,
diff --git a/src/app/query.rs b/src/app/query.rs
index cb1de833..0fa5a725 100644
--- a/src/app/query.rs
+++ b/src/app/query.rs
@@ -30,17 +30,18 @@ impl Query {
stream = stream.with_exclude(path);
}
+ let mut stream = stream.into_iter();
if self.interactive {
let mut fzf = Fzf::new(false)?;
while let Some(dir) = stream.next() {
- writeln!(fzf.stdin(), "{}", dir.display_score(now)).pipe_exit("fzf")?;
+ writeln!(fzf.stdin(), "{}", dir.display_score(now, Some(&self.keywords))).pipe_exit("fzf")?;
}
let selection = fzf.wait_select()?;
if self.score {
print!("{}", selection);
} else {
- let path = selection.get(5..).context("could not read selection from fzf")?;
+ let path = selection.get(10..).context("could not read selection from fzf")?;
print!("{}", path);
}
} else if self.list {
@@ -48,7 +49,7 @@ impl Query {
let handle = &mut stdout.lock();
while let Some(dir) = stream.next() {
if self.score {
- writeln!(handle, "{}", dir.display_score(now))
+ writeln!(handle, "{}", dir.display_score(now, Some(&self.keywords)))
} else {
writeln!(handle, "{}", dir.display())
}
@@ -58,7 +59,7 @@ impl Query {
} else {
let dir = stream.next().context("no match found")?;
if self.score {
- writeln!(io::stdout(), "{}", dir.display_score(now))
+ writeln!(io::stdout(), "{}", dir.display_score(now, Some(&self.keywords)))
} else {
writeln!(io::stdout(), "{}", dir.display())
}
diff --git a/src/app/remove.rs b/src/app/remove.rs
index 18334712..b3b90ff4 100644
--- a/src/app/remove.rs
+++ b/src/app/remove.rs
@@ -18,15 +18,15 @@ impl Run for Remove {
match &self.interactive {
Some(keywords) => {
let now = util::current_time()?;
- let mut stream = db.stream(now).with_keywords(keywords);
+ let mut stream = db.stream(now).with_keywords(keywords).into_iter();
let mut fzf = Fzf::new(true)?;
while let Some(dir) = stream.next() {
- writeln!(fzf.stdin(), "{}", dir.display_score(now)).pipe_exit("fzf")?;
+ writeln!(fzf.stdin(), "{}", dir.display_score(now, Some(keywords))).pipe_exit("fzf")?;
}
selection = fzf.wait_select()?;
- let paths = selection.lines().filter_map(|line| line.get(5..));
+ let paths = selection.lines().filter_map(|line| line.get(10..));
for path in paths {
if !db.remove(path) {
bail!("path not found in database: {}", path);
diff --git a/src/db/dir.rs b/src/db/dir.rs
index 1661a1fe..8ff819d0 100644
--- a/src/db/dir.rs
+++ b/src/db/dir.rs
@@ -1,12 +1,15 @@
use std::borrow::Cow;
use std::fmt::{self, Display, Formatter};
use std::ops::{Deref, DerefMut};
+use std::path::PathBuf;
+use std::str::FromStr;
use anyhow::{bail, Context, Result};
use bincode::Options as _;
use serde::{Deserialize, Serialize};
+use unicode_segmentation::UnicodeSegmentation;
-#[derive(Debug, Deserialize, Serialize)]
+#[derive(Debug, Deserialize, Serialize, Default)]
pub struct DirList<'a>(#[serde(borrow)] pub Vec
>);
impl DirList<'_> {
@@ -88,14 +91,14 @@ pub struct Dir<'a> {
}
impl Dir<'_> {
- pub fn score(&self, now: Epoch) -> Rank {
+ pub fn score(&self, now: Epoch, keywords: &Vec) -> Score {
const HOUR: Epoch = 60 * 60;
const DAY: Epoch = 24 * HOUR;
const WEEK: Epoch = 7 * DAY;
// The older the entry, the lesser its importance.
let duration = now.saturating_sub(self.last_accessed);
- if duration < HOUR {
+ let adjusted_rank = if duration < HOUR {
self.rank * 4.0
} else if duration < DAY {
self.rank * 2.0
@@ -103,18 +106,146 @@ impl Dir<'_> {
self.rank * 0.5
} else {
self.rank * 0.25
+ };
+
+ for keyword in keywords {
+ debug_assert!(self.path.to_lowercase().contains(&keyword.to_lowercase()));
+ }
+
+ let mut kw_score_sum = 0;
+
+ let smart_case = keywords.iter().all(|kw| &kw.to_lowercase() == kw);
+
+ // Split the path into components, then words, so the "m" can be a better match
+ // for "folk music" than for "tom", and the best match for "music".
+ // And even more so if it's the last path component.
+ let path = PathBuf::from_str(&self.path).unwrap(); // safe because error is Infallible
+ let path_components = path.components();
+ let mut is_last_component = true;
+ for component in path_components.rev() {
+ let component = component.as_os_str().to_str().unwrap(); // safe because the path came from a string
+ let component = if smart_case { component.to_lowercase() } else { component.to_owned() };
+
+ let left_word_boundaries = left_word_boundaries(&component);
+ for keyword in keywords {
+ kw_score_sum +=
+ Self::compute_kw_score(&component, keyword, &left_word_boundaries, smart_case, is_last_component);
+ }
+ is_last_component = false;
}
+
+ (kw_score_sum, adjusted_rank)
+ }
+
+ pub fn compute_kw_score(
+ path_component: &str,
+ keyword: &str,
+ left_word_boundaries: &Vec,
+ smart_case: bool,
+ is_last_component: bool,
+ ) -> u64 {
+ let keyword_lower = &keyword.to_lowercase();
+ let path_lower = path_component.to_lowercase();
+
+ // more than one boundary can match
+ let mut best_boundary_score = 0;
+ for idx in left_word_boundaries {
+ // TODO: think carefully about these rules. Should the case of the match
+ // be allowed to influence the score? What if it's all lowercase, so
+ // a smart case match is impossible?
+ let word = &path_component[*idx..];
+ let word_lower = &path_lower[*idx..];
+ if word.starts_with(keyword) {
+ // exact match, but even better if it's at the leftmost position in the component,
+ // like "D" matching $HOME/Documents
+ let score = if *idx == 0 { 105 } else { 100 };
+
+ // TODO: think about checking the right word boundary, and give extra points if it matches.
+ // Imagine two directories, src_3 and src. If src_3 is more frequently used, "sr" will
+ // match src_3. But "src" will match src.
+ best_boundary_score = best_boundary_score.max(score);
+ } else if !smart_case && word_lower.starts_with(keyword) {
+ // smart case is off (a keyword has case), but this keyword alone would be a smart case match
+ // for the component.
+ best_boundary_score = best_boundary_score.max(25);
+ } else if word_lower.starts_with(keyword_lower) {
+ // wrong case but it's a match otherwise
+ best_boundary_score = best_boundary_score.max(20);
+ } else {
+ // No score. We don't need to give any score for a keyword that matches but not on a word boundary--
+ // All paths being checked should at least match in that way.
+ // But note that though the path will match the keyword, this path component may not match.
+ }
+ }
+
+ if best_boundary_score > 0 && is_last_component {
+ // matches in the last path component should be considered a little better
+ best_boundary_score += 5;
+ }
+
+ best_boundary_score
}
pub fn display(&self) -> DirDisplay {
DirDisplay { dir: self }
}
- pub fn display_score(&self, now: Epoch) -> DirDisplayScore {
- DirDisplayScore { dir: self, now }
+ pub fn display_score(&self, now: Epoch, keywords: Option<&Vec>) -> DirDisplayScore {
+ DirDisplayScore { dir: self, now, keywords: keywords.map(|vec| vec.iter().cloned().collect()) }
}
}
+/// Returns byte indices that correspond to the leftmost position of each word.
+/// For input "hi there", the result will contain 0 and 3.
+///
+/// The result may also contain extraneous indices.
+fn left_word_boundaries(text: &str) -> Vec {
+ let mut boundaries = Vec::new();
+
+ #[derive(PartialEq, Clone, Copy, PartialOrd)]
+ enum Case {
+ None,
+ LowerCase,
+ UpperCase,
+ }
+
+ // We won't need the words themselves because we want to do multi-word match.
+ // We need the whole string for that.
+ for (word_idx, word) in text.unicode_word_indices() {
+ boundaries.push(word_idx);
+
+ // Also search for case changes, and non-text characters:
+ // MyDocuments
+ // my_documents
+ // TODO: should "clap3b4" count as 4 words or 1?
+ let mut prev_case = None;
+ for (grapheme_idx, grapheme) in word.grapheme_indices(true) {
+ let lower = grapheme.to_lowercase();
+ let upper = grapheme.to_uppercase();
+ let case = if lower == grapheme && upper == grapheme {
+ Case::None
+ } else if lower == grapheme {
+ Case::LowerCase
+ } else {
+ // Assume the other cases are upper case, because there might be more than
+ // one way to represent upper case
+ Case::UpperCase
+ };
+
+ if let Some(prev_case) = &prev_case {
+ if case > *prev_case {
+ // Consider this a word start if going from no case to any case,
+ // or lower case to upper case.
+ boundaries.push(word_idx + grapheme_idx);
+ }
+ }
+ let _ = prev_case.replace(case);
+ }
+ }
+
+ boundaries
+}
+
pub struct DirDisplay<'a> {
dir: &'a Dir<'a>,
}
@@ -128,11 +259,15 @@ impl Display for DirDisplay<'_> {
pub struct DirDisplayScore<'a> {
dir: &'a Dir<'a>,
now: Epoch,
+ keywords: Option>,
}
impl Display for DirDisplayScore<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
- let score = self.dir.score(self.now);
+ let no_keywords = Vec::default();
+ let keywords = self.keywords.as_ref().unwrap_or(&no_keywords);
+
+ let (kw_score, score) = self.dir.score(self.now, keywords);
let score = if score > 9999.0 {
9999
} else if score > 0.0 {
@@ -140,18 +275,19 @@ impl Display for DirDisplayScore<'_> {
} else {
0
};
- write!(f, "{:>4} {}", score, self.dir.path)
+ write!(f, "{:>4},{:>4} {}", kw_score, score, self.dir.path)
}
}
pub type Rank = f64;
+pub type Score = (u64, Rank);
pub type Epoch = u64;
#[cfg(test)]
mod tests {
- use std::borrow::Cow;
+ use std::{borrow::Cow, collections::HashSet};
- use super::{Dir, DirList};
+ use super::{left_word_boundaries, Dir, DirList};
#[test]
fn zero_copy() {
@@ -164,4 +300,32 @@ mod tests {
assert!(matches!(dir.path, Cow::Borrowed(_)))
}
}
+
+ #[test]
+ fn test_left_word_boundaries() {
+ assert!(left_word_boundaries("") == vec![]);
+ assert!(left_word_boundaries("Hi") == vec![0]);
+
+ assert!(vec![0, 3]
+ .into_iter()
+ .collect::>()
+ .is_subset(&left_word_boundaries("hi there").into_iter().collect()));
+ assert!(vec![0, 3]
+ .into_iter()
+ .collect::>()
+ .is_subset(&left_word_boundaries("hi_there").into_iter().collect()));
+
+ assert!(vec![0, 4] == left_word_boundaries("FürElise"));
+ assert!(vec![0, 1] == left_word_boundaries("uTorrent"));
+ assert!(vec![0, 2] == left_word_boundaries("µTorrent"));
+
+ assert!(vec![1, 6, 11]
+ .into_iter()
+ .collect::>()
+ .is_subset(&left_word_boundaries("/path/file.ext").into_iter().collect()));
+ assert!(vec![0, 3, 8, 13]
+ .into_iter()
+ .collect::>()
+ .is_subset(&left_word_boundaries(r"C:\path\file.ext").into_iter().collect()));
+ }
}
diff --git a/src/db/stream.rs b/src/db/stream.rs
index e5d3eb98..0204134b 100644
--- a/src/db/stream.rs
+++ b/src/db/stream.rs
@@ -1,5 +1,3 @@
-use std::iter::Rev;
-use std::ops::Range;
use std::{fs, path};
use ordered_float::OrderedFloat;
@@ -9,7 +7,6 @@ use crate::util;
pub struct Stream<'db, 'file> {
db: &'db mut Database<'file>,
- idxs: Rev>,
keywords: Vec,
@@ -18,25 +15,22 @@ pub struct Stream<'db, 'file> {
resolve_symlinks: bool,
exclude_path: Option,
+ now: Epoch,
}
impl<'db, 'file> Stream<'db, 'file> {
pub fn new(db: &'db mut Database<'file>, now: Epoch) -> Self {
- // Iterate in descending order of score.
- db.dirs.sort_unstable_by_key(|dir| OrderedFloat(dir.score(now)));
- let idxs = (0..db.dirs.len()).rev();
-
// If a directory is deleted and hasn't been used for 90 days, delete it from the database.
let expire_below = now.saturating_sub(90 * 24 * 60 * 60);
Stream {
db,
- idxs,
keywords: Vec::new(),
check_exists: false,
expire_below,
resolve_symlinks: false,
exclude_path: None,
+ now,
}
}
@@ -56,31 +50,22 @@ impl<'db, 'file> Stream<'db, 'file> {
self
}
- pub fn next(&mut self) -> Option<&Dir<'file>> {
- while let Some(idx) = self.idxs.next() {
- let dir = &self.db.dirs[idx];
+ pub fn into_iter(self) -> StreamIterator<'db, 'file> {
+ let mut idxs: Vec<_> = self.db.dirs.iter()
+ .enumerate() // store the original indices before filtering
+ .filter(|(_idx, dir)|
+ self.matches_keywords(&dir.path) &&
+ Some(dir.path.as_ref()) != self.exclude_path.as_deref())
+ .collect();
- if !self.matches_keywords(&dir.path) {
- continue;
- }
-
- if !self.matches_exists(&dir.path) {
- if dir.last_accessed < self.expire_below {
- self.db.dirs.swap_remove(idx);
- self.db.modified = true;
- }
- continue;
- }
-
- if Some(dir.path.as_ref()) == self.exclude_path.as_deref() {
- continue;
- }
-
- let dir = &self.db.dirs[idx];
- return Some(dir);
- }
+ // Iterate in descending order of score.
+ idxs.sort_by_cached_key(|(_idx, dir)| {
+ let (kw_score, frequency_score) = dir.score(self.now, &self.keywords);
+ (kw_score, OrderedFloat(frequency_score))
+ });
+ let idxs = idxs.into_iter().map(|(idx, _)| idx).rev().collect::>().into_iter(); // copy the indices to avoid lifetime issues
- None
+ StreamIterator { stream: self, idxs: Box::new(idxs) }
}
fn matches_exists>(&self, path: S) -> bool {
@@ -120,6 +105,32 @@ impl<'db, 'file> Stream<'db, 'file> {
}
}
+pub struct StreamIterator<'db, 'file> {
+ stream: Stream<'db, 'file>,
+ idxs: Box>,
+}
+
+impl<'db, 'file> StreamIterator<'db, 'file> {
+ pub fn next(&mut self) -> Option<&Dir<'file>> {
+ while let Some(idx) = self.idxs.next() {
+ let dir = &self.stream.db.dirs[idx];
+
+ if !self.stream.matches_exists(&dir.path) {
+ if dir.last_accessed < self.stream.expire_below {
+ self.stream.db.dirs.swap_remove(idx);
+ self.stream.db.modified = true;
+ }
+ continue;
+ }
+
+ let dir = &self.stream.db.dirs[idx];
+ return Some(dir);
+ }
+
+ None
+ }
+}
+
#[cfg(test)]
mod tests {
use std::path::PathBuf;