From d55f827545aa18af3d7a7e6fdfaef2a416373ed5 Mon Sep 17 00:00:00 2001 From: Dan Zwell Date: Fri, 17 Sep 2021 12:46:11 +0800 Subject: [PATCH 1/7] Make new iterator to delay sorting This prevents keywords or options from being added after sorting is done. So #260 can be implemented more safely. --- src/app/query.rs | 1 + src/app/remove.rs | 2 +- src/db/dir.rs | 2 +- src/db/stream.rs | 73 ++++++++++++++++++++++++++++------------------- 4 files changed, 46 insertions(+), 32 deletions(-) diff --git a/src/app/query.rs b/src/app/query.rs index cb1de833..8541189d 100644 --- a/src/app/query.rs +++ b/src/app/query.rs @@ -30,6 +30,7 @@ impl Query { stream = stream.with_exclude(path); } + let mut stream = stream.into_iter(); if self.interactive { let mut fzf = Fzf::new(false)?; while let Some(dir) = stream.next() { diff --git a/src/app/remove.rs b/src/app/remove.rs index 18334712..ba952c39 100644 --- a/src/app/remove.rs +++ b/src/app/remove.rs @@ -18,7 +18,7 @@ impl Run for Remove { match &self.interactive { Some(keywords) => { let now = util::current_time()?; - let mut stream = db.stream(now).with_keywords(keywords); + let mut stream = db.stream(now).with_keywords(keywords).into_iter(); let mut fzf = Fzf::new(true)?; while let Some(dir) = stream.next() { diff --git a/src/db/dir.rs b/src/db/dir.rs index 1661a1fe..c133a48b 100644 --- a/src/db/dir.rs +++ b/src/db/dir.rs @@ -6,7 +6,7 @@ use anyhow::{bail, Context, Result}; use bincode::Options as _; use serde::{Deserialize, Serialize}; -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, Deserialize, Serialize, Default)] pub struct DirList<'a>(#[serde(borrow)] pub Vec>); impl DirList<'_> { diff --git a/src/db/stream.rs b/src/db/stream.rs index e5d3eb98..fa7d6621 100644 --- a/src/db/stream.rs +++ b/src/db/stream.rs @@ -9,7 +9,6 @@ use crate::util; pub struct Stream<'db, 'file> { db: &'db mut Database<'file>, - idxs: Rev>, keywords: Vec, @@ -18,25 +17,22 @@ pub struct Stream<'db, 'file> { resolve_symlinks: bool, exclude_path: Option, + now: Epoch, } impl<'db, 'file> Stream<'db, 'file> { pub fn new(db: &'db mut Database<'file>, now: Epoch) -> Self { - // Iterate in descending order of score. - db.dirs.sort_unstable_by_key(|dir| OrderedFloat(dir.score(now))); - let idxs = (0..db.dirs.len()).rev(); - // If a directory is deleted and hasn't been used for 90 days, delete it from the database. let expire_below = now.saturating_sub(90 * 24 * 60 * 60); Stream { db, - idxs, keywords: Vec::new(), check_exists: false, expire_below, resolve_symlinks: false, exclude_path: None, + now, } } @@ -56,31 +52,14 @@ impl<'db, 'file> Stream<'db, 'file> { self } - pub fn next(&mut self) -> Option<&Dir<'file>> { - while let Some(idx) = self.idxs.next() { - let dir = &self.db.dirs[idx]; - - if !self.matches_keywords(&dir.path) { - continue; - } - - if !self.matches_exists(&dir.path) { - if dir.last_accessed < self.expire_below { - self.db.dirs.swap_remove(idx); - self.db.modified = true; - } - continue; - } - - if Some(dir.path.as_ref()) == self.exclude_path.as_deref() { - continue; - } - - let dir = &self.db.dirs[idx]; - return Some(dir); - } + pub fn into_iter(self) -> StreamIterator<'db, 'file> { + let mut dirs = std::mem::take(&mut self.db.dirs); + // Iterate in descending order of score. + dirs.sort_unstable_by_key(|dir| OrderedFloat(dir.score(self.now))); + let _ = std::mem::replace(&mut self.db.dirs, dirs); + let idxs = (0..self.db.dirs.len()).rev(); - None + StreamIterator { stream: self, idxs } } fn matches_exists>(&self, path: S) -> bool { @@ -120,6 +99,40 @@ impl<'db, 'file> Stream<'db, 'file> { } } +pub struct StreamIterator<'db, 'file> { + stream: Stream<'db, 'file>, + idxs: Rev>, +} + +impl<'db, 'file> StreamIterator<'db, 'file> { + pub fn next(&mut self) -> Option<&Dir<'file>> { + while let Some(idx) = self.idxs.next() { + let dir = &self.stream.db.dirs[idx]; + + if !self.stream.matches_keywords(&dir.path) { + continue; + } + + if !self.stream.matches_exists(&dir.path) { + if dir.last_accessed < self.stream.expire_below { + self.stream.db.dirs.swap_remove(idx); + self.stream.db.modified = true; + } + continue; + } + + if Some(dir.path.as_ref()) == self.stream.exclude_path.as_deref() { + continue; + } + + let dir = &self.stream.db.dirs[idx]; + return Some(dir); + } + + None + } +} + #[cfg(test)] mod tests { use std::path::PathBuf; From 6eba7d0d5673d4de395f514e2c6c19d37e2f7f07 Mon Sep 17 00:00:00 2001 From: Dan Zwell Date: Fri, 17 Sep 2021 18:41:25 +0800 Subject: [PATCH 2/7] Preliminary: score directories based on search keywords Keyword-based scoring is currently a noop. Directory filtering is done before scoring, except for a mutating filter that's complex to execute earlier. This is a step toward implemnting #260. --- src/app/query.rs | 2 +- src/app/remove.rs | 2 +- src/db/dir.rs | 17 ++++++++++++----- src/db/stream.rs | 30 ++++++++++++++---------------- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/app/query.rs b/src/app/query.rs index 8541189d..b888b6a2 100644 --- a/src/app/query.rs +++ b/src/app/query.rs @@ -41,7 +41,7 @@ impl Query { if self.score { print!("{}", selection); } else { - let path = selection.get(5..).context("could not read selection from fzf")?; + let path = selection.get(10..).context("could not read selection from fzf")?; print!("{}", path); } } else if self.list { diff --git a/src/app/remove.rs b/src/app/remove.rs index ba952c39..9175b2b8 100644 --- a/src/app/remove.rs +++ b/src/app/remove.rs @@ -26,7 +26,7 @@ impl Run for Remove { } selection = fzf.wait_select()?; - let paths = selection.lines().filter_map(|line| line.get(5..)); + let paths = selection.lines().filter_map(|line| line.get(10..)); for path in paths { if !db.remove(path) { bail!("path not found in database: {}", path); diff --git a/src/db/dir.rs b/src/db/dir.rs index c133a48b..1ef62af5 100644 --- a/src/db/dir.rs +++ b/src/db/dir.rs @@ -88,14 +88,14 @@ pub struct Dir<'a> { } impl Dir<'_> { - pub fn score(&self, now: Epoch) -> Rank { + pub fn score(&self, now: Epoch, _keywords: &Vec) -> Score { const HOUR: Epoch = 60 * 60; const DAY: Epoch = 24 * HOUR; const WEEK: Epoch = 7 * DAY; // The older the entry, the lesser its importance. let duration = now.saturating_sub(self.last_accessed); - if duration < HOUR { + let adjusted_rank = if duration < HOUR { self.rank * 4.0 } else if duration < DAY { self.rank * 2.0 @@ -103,7 +103,13 @@ impl Dir<'_> { self.rank * 0.5 } else { self.rank * 0.25 - } + }; + + // TODO: incorporate keywords into the scoring logic, so match quality is more significant + // than the access date. See issue #260. + let kw_score = 1; + + (kw_score, adjusted_rank) } pub fn display(&self) -> DirDisplay { @@ -132,7 +138,7 @@ pub struct DirDisplayScore<'a> { impl Display for DirDisplayScore<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let score = self.dir.score(self.now); + let (kw_score, score) = self.dir.score(self.now, &vec![]); let score = if score > 9999.0 { 9999 } else if score > 0.0 { @@ -140,11 +146,12 @@ impl Display for DirDisplayScore<'_> { } else { 0 }; - write!(f, "{:>4} {}", score, self.dir.path) + write!(f, "{:>4},{:>4} {}", kw_score, score, self.dir.path) } } pub type Rank = f64; +pub type Score = (u64, Rank); pub type Epoch = u64; #[cfg(test)] diff --git a/src/db/stream.rs b/src/db/stream.rs index fa7d6621..0204134b 100644 --- a/src/db/stream.rs +++ b/src/db/stream.rs @@ -1,5 +1,3 @@ -use std::iter::Rev; -use std::ops::Range; use std::{fs, path}; use ordered_float::OrderedFloat; @@ -53,13 +51,21 @@ impl<'db, 'file> Stream<'db, 'file> { } pub fn into_iter(self) -> StreamIterator<'db, 'file> { - let mut dirs = std::mem::take(&mut self.db.dirs); + let mut idxs: Vec<_> = self.db.dirs.iter() + .enumerate() // store the original indices before filtering + .filter(|(_idx, dir)| + self.matches_keywords(&dir.path) && + Some(dir.path.as_ref()) != self.exclude_path.as_deref()) + .collect(); + // Iterate in descending order of score. - dirs.sort_unstable_by_key(|dir| OrderedFloat(dir.score(self.now))); - let _ = std::mem::replace(&mut self.db.dirs, dirs); - let idxs = (0..self.db.dirs.len()).rev(); + idxs.sort_by_cached_key(|(_idx, dir)| { + let (kw_score, frequency_score) = dir.score(self.now, &self.keywords); + (kw_score, OrderedFloat(frequency_score)) + }); + let idxs = idxs.into_iter().map(|(idx, _)| idx).rev().collect::>().into_iter(); // copy the indices to avoid lifetime issues - StreamIterator { stream: self, idxs } + StreamIterator { stream: self, idxs: Box::new(idxs) } } fn matches_exists>(&self, path: S) -> bool { @@ -101,7 +107,7 @@ impl<'db, 'file> Stream<'db, 'file> { pub struct StreamIterator<'db, 'file> { stream: Stream<'db, 'file>, - idxs: Rev>, + idxs: Box>, } impl<'db, 'file> StreamIterator<'db, 'file> { @@ -109,10 +115,6 @@ impl<'db, 'file> StreamIterator<'db, 'file> { while let Some(idx) = self.idxs.next() { let dir = &self.stream.db.dirs[idx]; - if !self.stream.matches_keywords(&dir.path) { - continue; - } - if !self.stream.matches_exists(&dir.path) { if dir.last_accessed < self.stream.expire_below { self.stream.db.dirs.swap_remove(idx); @@ -121,10 +123,6 @@ impl<'db, 'file> StreamIterator<'db, 'file> { continue; } - if Some(dir.path.as_ref()) == self.stream.exclude_path.as_deref() { - continue; - } - let dir = &self.stream.db.dirs[idx]; return Some(dir); } From 21886e506cfc69efa3dd66899c1161c43cea77bd Mon Sep 17 00:00:00 2001 From: Dan Zwell Date: Mon, 13 Sep 2021 14:34:48 +0800 Subject: [PATCH 3/7] Use word boundary detection and give scores based on keyword matching I'm using a helper library to implement a unicode algorithm, but I'm also detecting case changes within a word (from lower to upper case, or no case to some case, so the "o" in "Documents" doesn't count as a new word). Words are not searched--rather, the string is searched starting at a word boundary. That way multi-word sequences will correctly match. This is a basic solution to #260. Some things to consider: - We don't have options to control the case. If smart-case is disabled, the weights in compute_kw_score need to change. - Right now keyword score totally overpowers the frequency score. The frequency score is only a tie-breaker. They could be normalized and weighted so a much better frequency score would win despite a slightly worse keyword score. - Should we detect word endings for exact matches? I'm not sure it would give a good user experience. If I frequently access "src9" but not "src", I don't want "src" to win just because it more exactly matches what I typed. It's hard to refrain from typing a whole word. This gives an interesting wrong result with "c" being a perfect match for "/mnt/c/anything". - The above issue can be solved if we consider digits to start a new word. - I'm testing these changes with `cargo r -- query --list --score b`. --- Cargo.lock | 1 + Cargo.toml | 1 + src/app/query.rs | 6 +- src/app/remove.rs | 2 +- src/db/dir.rs | 142 ++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 138 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e897a1cd..220535a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -875,4 +875,5 @@ dependencies = [ "rstest", "serde", "tempfile", + "unicode-segmentation", ] diff --git a/Cargo.toml b/Cargo.toml index 61c00d0f..a40a07b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ glob = "0.3.0" ordered-float = "2.0.0" serde = { version = "1.0.116", features = ["derive"] } tempfile = "3.1.0" +unicode-segmentation = "1.8.0" [target.'cfg(windows)'.dependencies] rand = { version = "0.8.4", features = [ diff --git a/src/app/query.rs b/src/app/query.rs index b888b6a2..0fa5a725 100644 --- a/src/app/query.rs +++ b/src/app/query.rs @@ -34,7 +34,7 @@ impl Query { if self.interactive { let mut fzf = Fzf::new(false)?; while let Some(dir) = stream.next() { - writeln!(fzf.stdin(), "{}", dir.display_score(now)).pipe_exit("fzf")?; + writeln!(fzf.stdin(), "{}", dir.display_score(now, Some(&self.keywords))).pipe_exit("fzf")?; } let selection = fzf.wait_select()?; @@ -49,7 +49,7 @@ impl Query { let handle = &mut stdout.lock(); while let Some(dir) = stream.next() { if self.score { - writeln!(handle, "{}", dir.display_score(now)) + writeln!(handle, "{}", dir.display_score(now, Some(&self.keywords))) } else { writeln!(handle, "{}", dir.display()) } @@ -59,7 +59,7 @@ impl Query { } else { let dir = stream.next().context("no match found")?; if self.score { - writeln!(io::stdout(), "{}", dir.display_score(now)) + writeln!(io::stdout(), "{}", dir.display_score(now, Some(&self.keywords))) } else { writeln!(io::stdout(), "{}", dir.display()) } diff --git a/src/app/remove.rs b/src/app/remove.rs index 9175b2b8..b3b90ff4 100644 --- a/src/app/remove.rs +++ b/src/app/remove.rs @@ -22,7 +22,7 @@ impl Run for Remove { let mut fzf = Fzf::new(true)?; while let Some(dir) = stream.next() { - writeln!(fzf.stdin(), "{}", dir.display_score(now)).pipe_exit("fzf")?; + writeln!(fzf.stdin(), "{}", dir.display_score(now, Some(keywords))).pipe_exit("fzf")?; } selection = fzf.wait_select()?; diff --git a/src/db/dir.rs b/src/db/dir.rs index 1ef62af5..7cf6934e 100644 --- a/src/db/dir.rs +++ b/src/db/dir.rs @@ -5,6 +5,7 @@ use std::ops::{Deref, DerefMut}; use anyhow::{bail, Context, Result}; use bincode::Options as _; use serde::{Deserialize, Serialize}; +use unicode_segmentation::UnicodeSegmentation; #[derive(Debug, Deserialize, Serialize, Default)] pub struct DirList<'a>(#[serde(borrow)] pub Vec>); @@ -88,7 +89,7 @@ pub struct Dir<'a> { } impl Dir<'_> { - pub fn score(&self, now: Epoch, _keywords: &Vec) -> Score { + pub fn score(&self, now: Epoch, keywords: &Vec) -> Score { const HOUR: Epoch = 60 * 60; const DAY: Epoch = 24 * HOUR; const WEEK: Epoch = 7 * DAY; @@ -105,22 +106,111 @@ impl Dir<'_> { self.rank * 0.25 }; - // TODO: incorporate keywords into the scoring logic, so match quality is more significant - // than the access date. See issue #260. - let kw_score = 1; + let left_word_boundaries = left_word_boundaries(&self.path); - (kw_score, adjusted_rank) + let mut kw_score_sum = 0; + for keyword in keywords { + kw_score_sum += self.compute_kw_score(keyword, &left_word_boundaries); + } + + (kw_score_sum, adjusted_rank) + } + + pub fn compute_kw_score(&self, keyword: &str, left_word_boundaries: &Vec) -> u64 { + let keyword_lower = &keyword.to_lowercase(); + let path_lower = self.path.to_lowercase(); + + // more than one boundary can match + let mut best_boundary_score = 0; + for idx in left_word_boundaries { + // TODO: think carefully about these rules. Should the case of the match + // be allowed to influence the score? What if it's all lowercase, so + // a smart case match is impossible? + let path = &self.path[*idx..]; + let path_lower = &path_lower[*idx..]; + if path.starts_with(keyword) { + // exact match + + // TODO: think about checking the right word boundary, and give extra points if it matches. + // Imagine two directories, src_3 and src. If src_3 is more frequently used, "sr" will + // match src_3. But "src" will match src. + best_boundary_score = best_boundary_score.max(100); + } else if path_lower.starts_with(keyword) { + // smart case match + best_boundary_score = best_boundary_score.max(90); + } else if path_lower.starts_with(keyword_lower) { + // wrong case but it's a match otherwise + best_boundary_score = best_boundary_score.max(20); + } + + // We don't need to give any score for a keyword that matches but not on a word boundary-- + // All paths being checked should at least match in that way. + } + debug_assert!(path_lower.contains(keyword_lower)); + + best_boundary_score } pub fn display(&self) -> DirDisplay { DirDisplay { dir: self } } - pub fn display_score(&self, now: Epoch) -> DirDisplayScore { - DirDisplayScore { dir: self, now } + pub fn display_score(&self, now: Epoch, keywords: Option<&Vec>) -> DirDisplayScore { + DirDisplayScore { dir: self, now, keywords: keywords.map(|vec| vec.iter().cloned().collect()) } } } +/// Returns byte indices that correspond to the leftmost position of each word. +/// For input "hi there", the result will contain 0 and 3. +/// +/// The result may also contain extraneous indices. +fn left_word_boundaries(text: &str) -> Vec { + let mut boundaries = Vec::new(); + + #[derive(PartialEq, Clone, Copy, PartialOrd)] + enum Case { + None, + LowerCase, + UpperCase, + } + + // We won't need the words themselves because we want to do multi-word match. + // We need the whole string for that. + for (word_idx, word) in text.unicode_word_indices() { + boundaries.push(word_idx); + + // Also search for case changes, and non-text characters: + // MyDocuments + // my_documents + // TODO: should "clap3b4" count as 4 words or 1? + let mut prev_case = None; + for (grapheme_idx, grapheme) in word.grapheme_indices(true) { + let lower = grapheme.to_lowercase(); + let upper = grapheme.to_uppercase(); + let case = if lower == grapheme && upper == grapheme { + Case::None + } else if lower == grapheme { + Case::LowerCase + } else { + // Assume the other cases are upper case, because there might be more than + // one way to represent upper case + Case::UpperCase + }; + + if let Some(prev_case) = &prev_case { + if case > *prev_case { + // Consider this a word start if going from no case to any case, + // or lower case to upper case. + boundaries.push(word_idx + grapheme_idx); + } + } + let _ = prev_case.replace(case); + } + } + + boundaries +} + pub struct DirDisplay<'a> { dir: &'a Dir<'a>, } @@ -134,11 +224,15 @@ impl Display for DirDisplay<'_> { pub struct DirDisplayScore<'a> { dir: &'a Dir<'a>, now: Epoch, + keywords: Option>, } impl Display for DirDisplayScore<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let (kw_score, score) = self.dir.score(self.now, &vec![]); + let no_keywords = Vec::default(); + let keywords = self.keywords.as_ref().unwrap_or(&no_keywords); + + let (kw_score, score) = self.dir.score(self.now, keywords); let score = if score > 9999.0 { 9999 } else if score > 0.0 { @@ -156,9 +250,9 @@ pub type Epoch = u64; #[cfg(test)] mod tests { - use std::borrow::Cow; + use std::{borrow::Cow, collections::HashSet}; - use super::{Dir, DirList}; + use super::{left_word_boundaries, Dir, DirList}; #[test] fn zero_copy() { @@ -171,4 +265,32 @@ mod tests { assert!(matches!(dir.path, Cow::Borrowed(_))) } } + + #[test] + fn test_left_word_boundaries() { + assert!(left_word_boundaries("") == vec![]); + assert!(left_word_boundaries("Hi") == vec![0]); + + assert!(vec![0, 3] + .into_iter() + .collect::>() + .is_subset(&left_word_boundaries("hi there").into_iter().collect())); + assert!(vec![0, 3] + .into_iter() + .collect::>() + .is_subset(&left_word_boundaries("hi_there").into_iter().collect())); + + assert!(vec![0, 4] == left_word_boundaries("FürElise")); + assert!(vec![0, 1] == left_word_boundaries("uTorrent")); + assert!(vec![0, 2] == left_word_boundaries("µTorrent")); + + assert!(vec![1, 6, 11] + .into_iter() + .collect::>() + .is_subset(&left_word_boundaries("/path/file.ext").into_iter().collect())); + assert!(vec![0, 3, 8, 13] + .into_iter() + .collect::>() + .is_subset(&left_word_boundaries(r"C:\path\file.ext").into_iter().collect())); + } } From 23136619b3bc423351d4cb6429fe2fffd96a1dff Mon Sep 17 00:00:00 2001 From: Dan Zwell Date: Fri, 26 Nov 2021 13:51:59 +0800 Subject: [PATCH 4/7] Update documentation and completion strings: describe the score format. --- contrib/completions/_zoxide | 4 ++-- contrib/completions/_zoxide.ps1 | 4 ++-- contrib/completions/zoxide.elv | 4 ++-- contrib/completions/zoxide.fish | 2 +- contrib/completions/zoxide.ts | 2 +- src/app/_app.rs | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/contrib/completions/_zoxide b/contrib/completions/_zoxide index 178a27d3..2fe0400d 100644 --- a/contrib/completions/_zoxide +++ b/contrib/completions/_zoxide @@ -68,8 +68,8 @@ _arguments "${_arguments_options[@]}" \ '(-l --list)--interactive[Use interactive selection]' \ '(-i --interactive)-l[List all matching directories]' \ '(-i --interactive)--list[List all matching directories]' \ -'(-i --interactive)-s[Print score with results]' \ -'(-i --interactive)--score[Print score with results]' \ +'(-i --interactive)-s[Print score with results (keyword match score, frequency score)]' \ +'(-i --interactive)--score[Print score with results (keyword match score, frequency score)]' \ '-h[Print help information]' \ '--help[Print help information]' \ '-V[Print version information]' \ diff --git a/contrib/completions/_zoxide.ps1 b/contrib/completions/_zoxide.ps1 index 72138e63..4a427e6a 100644 --- a/contrib/completions/_zoxide.ps1 +++ b/contrib/completions/_zoxide.ps1 @@ -64,8 +64,8 @@ Register-ArgumentCompleter -Native -CommandName 'zoxide' -ScriptBlock { [CompletionResult]::new('--interactive', 'interactive', [CompletionResultType]::ParameterName, 'Use interactive selection') [CompletionResult]::new('-l', 'l', [CompletionResultType]::ParameterName, 'List all matching directories') [CompletionResult]::new('--list', 'list', [CompletionResultType]::ParameterName, 'List all matching directories') - [CompletionResult]::new('-s', 's', [CompletionResultType]::ParameterName, 'Print score with results') - [CompletionResult]::new('--score', 'score', [CompletionResultType]::ParameterName, 'Print score with results') + [CompletionResult]::new('-s', 's', [CompletionResultType]::ParameterName, 'Print score with results (keyword match score, frequency score)') + [CompletionResult]::new('--score', 'score', [CompletionResultType]::ParameterName, 'Print score with results (keyword match score, frequency score)') [CompletionResult]::new('-h', 'h', [CompletionResultType]::ParameterName, 'Print help information') [CompletionResult]::new('--help', 'help', [CompletionResultType]::ParameterName, 'Print help information') [CompletionResult]::new('-V', 'V', [CompletionResultType]::ParameterName, 'Print version information') diff --git a/contrib/completions/zoxide.elv b/contrib/completions/zoxide.elv index dfdebc23..5783df10 100644 --- a/contrib/completions/zoxide.elv +++ b/contrib/completions/zoxide.elv @@ -58,8 +58,8 @@ set edit:completion:arg-completer[zoxide] = [@words]{ cand --interactive 'Use interactive selection' cand -l 'List all matching directories' cand --list 'List all matching directories' - cand -s 'Print score with results' - cand --score 'Print score with results' + cand -s 'Print score with results (keyword match score, frequency score)' + cand --score 'Print score with results (keyword match score, frequency score)' cand -h 'Print help information' cand --help 'Print help information' cand -V 'Print version information' diff --git a/contrib/completions/zoxide.fish b/contrib/completions/zoxide.fish index 1ca8db01..3d4a6077 100644 --- a/contrib/completions/zoxide.fish +++ b/contrib/completions/zoxide.fish @@ -20,7 +20,7 @@ complete -c zoxide -n "__fish_seen_subcommand_from query" -l exclude -d 'Exclude complete -c zoxide -n "__fish_seen_subcommand_from query" -l all -d 'Show deleted directories' complete -c zoxide -n "__fish_seen_subcommand_from query" -s i -l interactive -d 'Use interactive selection' complete -c zoxide -n "__fish_seen_subcommand_from query" -s l -l list -d 'List all matching directories' -complete -c zoxide -n "__fish_seen_subcommand_from query" -s s -l score -d 'Print score with results' +complete -c zoxide -n "__fish_seen_subcommand_from query" -s s -l score -d 'Print score with results (keyword match score, frequency score)' complete -c zoxide -n "__fish_seen_subcommand_from query" -s h -l help -d 'Print help information' complete -c zoxide -n "__fish_seen_subcommand_from query" -s V -l version -d 'Print version information' complete -c zoxide -n "__fish_seen_subcommand_from remove" -s i -l interactive -r diff --git a/contrib/completions/zoxide.ts b/contrib/completions/zoxide.ts index 0c41a758..b86362a1 100644 --- a/contrib/completions/zoxide.ts +++ b/contrib/completions/zoxide.ts @@ -159,7 +159,7 @@ const completion: Fig.Spec = { }, { name: ["-s", "--score"], - description: "Print score with results", + description: "Print score with results (keyword match score, frequency score)", }, { name: ["-h", "--help"], diff --git a/src/app/_app.rs b/src/app/_app.rs index e40e3e74..941358b9 100644 --- a/src/app/_app.rs +++ b/src/app/_app.rs @@ -111,7 +111,7 @@ pub struct Query { #[clap(long, short, conflicts_with = "interactive")] pub list: bool, - /// Print score with results + /// Print score with results (keyword match score, frequency score) #[clap(long, short, conflicts_with = "interactive")] pub score: bool, From f94db841a923f20650c8d798dbe361828ec982c9 Mon Sep 17 00:00:00 2001 From: Dan Zwell Date: Sat, 27 Nov 2021 11:06:23 +0800 Subject: [PATCH 5/7] Matches in the last path component should be considered a little better than other matches. --- src/db/dir.rs | 52 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/src/db/dir.rs b/src/db/dir.rs index 7cf6934e..29bca175 100644 --- a/src/db/dir.rs +++ b/src/db/dir.rs @@ -1,6 +1,8 @@ use std::borrow::Cow; use std::fmt::{self, Display, Formatter}; use std::ops::{Deref, DerefMut}; +use std::path::PathBuf; +use std::str::FromStr; use anyhow::{bail, Context, Result}; use bincode::Options as _; @@ -106,19 +108,38 @@ impl Dir<'_> { self.rank * 0.25 }; - let left_word_boundaries = left_word_boundaries(&self.path); + for keyword in keywords { + debug_assert!(self.path.to_lowercase().contains(&keyword.to_lowercase())); + } let mut kw_score_sum = 0; - for keyword in keywords { - kw_score_sum += self.compute_kw_score(keyword, &left_word_boundaries); + + // Split the path into components, then words, so the "M" can be a better match + // for "folk music" than for "tom", and the best match for "music". + // And even more so if it's the last path component. + let path = PathBuf::from_str(&self.path).unwrap(); // safe because error is Infallible + let path_components = path.components(); + let mut is_last_component = true; + for component in path_components.rev() { + let component = component.as_os_str().to_str().unwrap(); // safe because the path came from a string + let left_word_boundaries = left_word_boundaries(&component); + for keyword in keywords { + kw_score_sum += Self::compute_kw_score(&component, keyword, &left_word_boundaries, is_last_component); + } + is_last_component = false; } (kw_score_sum, adjusted_rank) } - pub fn compute_kw_score(&self, keyword: &str, left_word_boundaries: &Vec) -> u64 { + pub fn compute_kw_score( + path_component: &str, + keyword: &str, + left_word_boundaries: &Vec, + is_last_component: bool, + ) -> u64 { let keyword_lower = &keyword.to_lowercase(); - let path_lower = self.path.to_lowercase(); + let path_lower = path_component.to_lowercase(); // more than one boundary can match let mut best_boundary_score = 0; @@ -126,27 +147,32 @@ impl Dir<'_> { // TODO: think carefully about these rules. Should the case of the match // be allowed to influence the score? What if it's all lowercase, so // a smart case match is impossible? - let path = &self.path[*idx..]; - let path_lower = &path_lower[*idx..]; - if path.starts_with(keyword) { + let word = &path_component[*idx..]; + let word_lower = &path_lower[*idx..]; + if word.starts_with(keyword) { // exact match // TODO: think about checking the right word boundary, and give extra points if it matches. // Imagine two directories, src_3 and src. If src_3 is more frequently used, "sr" will // match src_3. But "src" will match src. best_boundary_score = best_boundary_score.max(100); - } else if path_lower.starts_with(keyword) { + } else if word_lower.starts_with(keyword) { // smart case match best_boundary_score = best_boundary_score.max(90); - } else if path_lower.starts_with(keyword_lower) { + } else if word_lower.starts_with(keyword_lower) { // wrong case but it's a match otherwise best_boundary_score = best_boundary_score.max(20); + } else { + // No score. We don't need to give any score for a keyword that matches but not on a word boundary-- + // All paths being checked should at least match in that way. + // But note that though the path will match the keyword, this path component may not match. } + } - // We don't need to give any score for a keyword that matches but not on a word boundary-- - // All paths being checked should at least match in that way. + if best_boundary_score > 0 && is_last_component { + // matches in the last path component should be considered a little better + best_boundary_score += 5; } - debug_assert!(path_lower.contains(keyword_lower)); best_boundary_score } From 8d59bd5530f99babac017dd9368d5e7662875305 Mon Sep 17 00:00:00 2001 From: Dan Zwell Date: Sat, 27 Nov 2021 11:09:35 +0800 Subject: [PATCH 6/7] Consider leftmost matching within a component to be best So when searching 'd', ~/documents matches better than ~/my-documents. --- src/db/dir.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/db/dir.rs b/src/db/dir.rs index 29bca175..0fd6a1d2 100644 --- a/src/db/dir.rs +++ b/src/db/dir.rs @@ -150,12 +150,14 @@ impl Dir<'_> { let word = &path_component[*idx..]; let word_lower = &path_lower[*idx..]; if word.starts_with(keyword) { - // exact match + // exact match, but even better if it's at the leftmost position in the component, + // like "D" matching $HOME/Documents + let score = if *idx == 0 { 105 } else { 100 }; // TODO: think about checking the right word boundary, and give extra points if it matches. // Imagine two directories, src_3 and src. If src_3 is more frequently used, "sr" will // match src_3. But "src" will match src. - best_boundary_score = best_boundary_score.max(100); + best_boundary_score = best_boundary_score.max(score); } else if word_lower.starts_with(keyword) { // smart case match best_boundary_score = best_boundary_score.max(90); From a260d922ed92090f64b9849e4e282ab2ccd11ffd Mon Sep 17 00:00:00 2001 From: Dan Zwell Date: Sat, 27 Nov 2021 11:32:33 +0800 Subject: [PATCH 7/7] Implement smart case matching A future option should be to turn off smart case, but that's not a priority, for the reasons mentioned here: https://github.com/ajeetdsouza/zoxide/issues/224 --- src/db/dir.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/db/dir.rs b/src/db/dir.rs index 0fd6a1d2..8ff819d0 100644 --- a/src/db/dir.rs +++ b/src/db/dir.rs @@ -114,7 +114,9 @@ impl Dir<'_> { let mut kw_score_sum = 0; - // Split the path into components, then words, so the "M" can be a better match + let smart_case = keywords.iter().all(|kw| &kw.to_lowercase() == kw); + + // Split the path into components, then words, so the "m" can be a better match // for "folk music" than for "tom", and the best match for "music". // And even more so if it's the last path component. let path = PathBuf::from_str(&self.path).unwrap(); // safe because error is Infallible @@ -122,9 +124,12 @@ impl Dir<'_> { let mut is_last_component = true; for component in path_components.rev() { let component = component.as_os_str().to_str().unwrap(); // safe because the path came from a string + let component = if smart_case { component.to_lowercase() } else { component.to_owned() }; + let left_word_boundaries = left_word_boundaries(&component); for keyword in keywords { - kw_score_sum += Self::compute_kw_score(&component, keyword, &left_word_boundaries, is_last_component); + kw_score_sum += + Self::compute_kw_score(&component, keyword, &left_word_boundaries, smart_case, is_last_component); } is_last_component = false; } @@ -136,6 +141,7 @@ impl Dir<'_> { path_component: &str, keyword: &str, left_word_boundaries: &Vec, + smart_case: bool, is_last_component: bool, ) -> u64 { let keyword_lower = &keyword.to_lowercase(); @@ -158,9 +164,10 @@ impl Dir<'_> { // Imagine two directories, src_3 and src. If src_3 is more frequently used, "sr" will // match src_3. But "src" will match src. best_boundary_score = best_boundary_score.max(score); - } else if word_lower.starts_with(keyword) { - // smart case match - best_boundary_score = best_boundary_score.max(90); + } else if !smart_case && word_lower.starts_with(keyword) { + // smart case is off (a keyword has case), but this keyword alone would be a smart case match + // for the component. + best_boundary_score = best_boundary_score.max(25); } else if word_lower.starts_with(keyword_lower) { // wrong case but it's a match otherwise best_boundary_score = best_boundary_score.max(20);