Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Score directories based on the keywords searched #264

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ glob = "0.3.0"
ordered-float = "2.0.0"
serde = { version = "1.0.116", features = ["derive"] }
tempfile = "3.1.0"
unicode-segmentation = "1.8.0"

[target.'cfg(windows)'.dependencies]
rand = { version = "0.8.4", features = [
Expand Down
4 changes: 2 additions & 2 deletions contrib/completions/_zoxide

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions contrib/completions/_zoxide.ps1

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions contrib/completions/zoxide.elv

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion contrib/completions/zoxide.fish

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion contrib/completions/zoxide.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/app/_app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ pub struct Query {
#[clap(long, short, conflicts_with = "interactive")]
pub list: bool,

/// Print score with results
/// Print score with results (keyword match score, frequency score)
#[clap(long, short, conflicts_with = "interactive")]
pub score: bool,

Expand Down
9 changes: 5 additions & 4 deletions src/app/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,26 @@ impl Query {
stream = stream.with_exclude(path);
}

let mut stream = stream.into_iter();
if self.interactive {
let mut fzf = Fzf::new(false)?;
while let Some(dir) = stream.next() {
writeln!(fzf.stdin(), "{}", dir.display_score(now)).pipe_exit("fzf")?;
writeln!(fzf.stdin(), "{}", dir.display_score(now, Some(&self.keywords))).pipe_exit("fzf")?;
}

let selection = fzf.wait_select()?;
if self.score {
print!("{}", selection);
} else {
let path = selection.get(5..).context("could not read selection from fzf")?;
let path = selection.get(10..).context("could not read selection from fzf")?;
print!("{}", path);
}
} else if self.list {
let stdout = io::stdout();
let handle = &mut stdout.lock();
while let Some(dir) = stream.next() {
if self.score {
writeln!(handle, "{}", dir.display_score(now))
writeln!(handle, "{}", dir.display_score(now, Some(&self.keywords)))
} else {
writeln!(handle, "{}", dir.display())
}
Expand All @@ -58,7 +59,7 @@ impl Query {
} else {
let dir = stream.next().context("no match found")?;
if self.score {
writeln!(io::stdout(), "{}", dir.display_score(now))
writeln!(io::stdout(), "{}", dir.display_score(now, Some(&self.keywords)))
} else {
writeln!(io::stdout(), "{}", dir.display())
}
Expand Down
6 changes: 3 additions & 3 deletions src/app/remove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ impl Run for Remove {
match &self.interactive {
Some(keywords) => {
let now = util::current_time()?;
let mut stream = db.stream(now).with_keywords(keywords);
let mut stream = db.stream(now).with_keywords(keywords).into_iter();

let mut fzf = Fzf::new(true)?;
while let Some(dir) = stream.next() {
writeln!(fzf.stdin(), "{}", dir.display_score(now)).pipe_exit("fzf")?;
writeln!(fzf.stdin(), "{}", dir.display_score(now, Some(keywords))).pipe_exit("fzf")?;
}

selection = fzf.wait_select()?;
let paths = selection.lines().filter_map(|line| line.get(5..));
let paths = selection.lines().filter_map(|line| line.get(10..));
for path in paths {
if !db.remove(path) {
bail!("path not found in database: {}", path);
Expand Down
182 changes: 173 additions & 9 deletions src/db/dir.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
use std::borrow::Cow;
use std::fmt::{self, Display, Formatter};
use std::ops::{Deref, DerefMut};
use std::path::PathBuf;
use std::str::FromStr;

use anyhow::{bail, Context, Result};
use bincode::Options as _;
use serde::{Deserialize, Serialize};
use unicode_segmentation::UnicodeSegmentation;

#[derive(Debug, Deserialize, Serialize)]
#[derive(Debug, Deserialize, Serialize, Default)]
pub struct DirList<'a>(#[serde(borrow)] pub Vec<Dir<'a>>);

impl DirList<'_> {
Expand Down Expand Up @@ -88,33 +91,161 @@ pub struct Dir<'a> {
}

impl Dir<'_> {
pub fn score(&self, now: Epoch) -> Rank {
pub fn score(&self, now: Epoch, keywords: &Vec<String>) -> Score {
const HOUR: Epoch = 60 * 60;
const DAY: Epoch = 24 * HOUR;
const WEEK: Epoch = 7 * DAY;

// The older the entry, the lesser its importance.
let duration = now.saturating_sub(self.last_accessed);
if duration < HOUR {
let adjusted_rank = if duration < HOUR {
self.rank * 4.0
} else if duration < DAY {
self.rank * 2.0
} else if duration < WEEK {
self.rank * 0.5
} else {
self.rank * 0.25
};

for keyword in keywords {
debug_assert!(self.path.to_lowercase().contains(&keyword.to_lowercase()));
}

let mut kw_score_sum = 0;

let smart_case = keywords.iter().all(|kw| &kw.to_lowercase() == kw);

// Split the path into components, then words, so the "m" can be a better match
// for "folk music" than for "tom", and the best match for "music".
// And even more so if it's the last path component.
let path = PathBuf::from_str(&self.path).unwrap(); // safe because error is Infallible
let path_components = path.components();
let mut is_last_component = true;
for component in path_components.rev() {
let component = component.as_os_str().to_str().unwrap(); // safe because the path came from a string
let component = if smart_case { component.to_lowercase() } else { component.to_owned() };

let left_word_boundaries = left_word_boundaries(&component);
for keyword in keywords {
kw_score_sum +=
Self::compute_kw_score(&component, keyword, &left_word_boundaries, smart_case, is_last_component);
}
is_last_component = false;
}

(kw_score_sum, adjusted_rank)
}

pub fn compute_kw_score(
path_component: &str,
keyword: &str,
left_word_boundaries: &Vec<usize>,
smart_case: bool,
is_last_component: bool,
) -> u64 {
let keyword_lower = &keyword.to_lowercase();
let path_lower = path_component.to_lowercase();

// more than one boundary can match
let mut best_boundary_score = 0;
for idx in left_word_boundaries {
// TODO: think carefully about these rules. Should the case of the match
// be allowed to influence the score? What if it's all lowercase, so
// a smart case match is impossible?
let word = &path_component[*idx..];
let word_lower = &path_lower[*idx..];
if word.starts_with(keyword) {
// exact match, but even better if it's at the leftmost position in the component,
// like "D" matching $HOME/Documents
let score = if *idx == 0 { 105 } else { 100 };

// TODO: think about checking the right word boundary, and give extra points if it matches.
// Imagine two directories, src_3 and src. If src_3 is more frequently used, "sr" will
// match src_3. But "src" will match src.
best_boundary_score = best_boundary_score.max(score);
} else if !smart_case && word_lower.starts_with(keyword) {
// smart case is off (a keyword has case), but this keyword alone would be a smart case match
// for the component.
best_boundary_score = best_boundary_score.max(25);
} else if word_lower.starts_with(keyword_lower) {
// wrong case but it's a match otherwise
best_boundary_score = best_boundary_score.max(20);
} else {
// No score. We don't need to give any score for a keyword that matches but not on a word boundary--
// All paths being checked should at least match in that way.
// But note that though the path will match the keyword, this path component may not match.
}
}

if best_boundary_score > 0 && is_last_component {
// matches in the last path component should be considered a little better
best_boundary_score += 5;
}

best_boundary_score
}

pub fn display(&self) -> DirDisplay {
DirDisplay { dir: self }
}

pub fn display_score(&self, now: Epoch) -> DirDisplayScore {
DirDisplayScore { dir: self, now }
pub fn display_score(&self, now: Epoch, keywords: Option<&Vec<String>>) -> DirDisplayScore {
DirDisplayScore { dir: self, now, keywords: keywords.map(|vec| vec.iter().cloned().collect()) }
}
}

/// Returns byte indices that correspond to the leftmost position of each word.
/// For input "hi there", the result will contain 0 and 3.
///
/// The result may also contain extraneous indices.
fn left_word_boundaries(text: &str) -> Vec<usize> {
let mut boundaries = Vec::new();

#[derive(PartialEq, Clone, Copy, PartialOrd)]
enum Case {
None,
LowerCase,
UpperCase,
}

// We won't need the words themselves because we want to do multi-word match.
// We need the whole string for that.
for (word_idx, word) in text.unicode_word_indices() {
boundaries.push(word_idx);

// Also search for case changes, and non-text characters:
// MyDocuments
// my_documents
// TODO: should "clap3b4" count as 4 words or 1?
let mut prev_case = None;
for (grapheme_idx, grapheme) in word.grapheme_indices(true) {
let lower = grapheme.to_lowercase();
let upper = grapheme.to_uppercase();
let case = if lower == grapheme && upper == grapheme {
Case::None
} else if lower == grapheme {
Case::LowerCase
} else {
// Assume the other cases are upper case, because there might be more than
// one way to represent upper case
Case::UpperCase
};

if let Some(prev_case) = &prev_case {
if case > *prev_case {
// Consider this a word start if going from no case to any case,
// or lower case to upper case.
boundaries.push(word_idx + grapheme_idx);
}
}
let _ = prev_case.replace(case);
}
}

boundaries
}

pub struct DirDisplay<'a> {
dir: &'a Dir<'a>,
}
Expand All @@ -128,30 +259,35 @@ impl Display for DirDisplay<'_> {
pub struct DirDisplayScore<'a> {
dir: &'a Dir<'a>,
now: Epoch,
keywords: Option<Vec<String>>,
}

impl Display for DirDisplayScore<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
let score = self.dir.score(self.now);
let no_keywords = Vec::default();
let keywords = self.keywords.as_ref().unwrap_or(&no_keywords);

let (kw_score, score) = self.dir.score(self.now, keywords);
let score = if score > 9999.0 {
9999
} else if score > 0.0 {
score as u32
} else {
0
};
write!(f, "{:>4} {}", score, self.dir.path)
write!(f, "{:>4},{:>4} {}", kw_score, score, self.dir.path)
}
}

pub type Rank = f64;
pub type Score = (u64, Rank);
pub type Epoch = u64;

#[cfg(test)]
mod tests {
use std::borrow::Cow;
use std::{borrow::Cow, collections::HashSet};

use super::{Dir, DirList};
use super::{left_word_boundaries, Dir, DirList};

#[test]
fn zero_copy() {
Expand All @@ -164,4 +300,32 @@ mod tests {
assert!(matches!(dir.path, Cow::Borrowed(_)))
}
}

#[test]
fn test_left_word_boundaries() {
assert!(left_word_boundaries("") == vec![]);
assert!(left_word_boundaries("Hi") == vec![0]);

assert!(vec![0, 3]
.into_iter()
.collect::<HashSet<_>>()
.is_subset(&left_word_boundaries("hi there").into_iter().collect()));
assert!(vec![0, 3]
.into_iter()
.collect::<HashSet<_>>()
.is_subset(&left_word_boundaries("hi_there").into_iter().collect()));

assert!(vec![0, 4] == left_word_boundaries("FürElise"));
assert!(vec![0, 1] == left_word_boundaries("uTorrent"));
assert!(vec![0, 2] == left_word_boundaries("µTorrent"));

assert!(vec![1, 6, 11]
.into_iter()
.collect::<HashSet<_>>()
.is_subset(&left_word_boundaries("/path/file.ext").into_iter().collect()));
assert!(vec![0, 3, 8, 13]
.into_iter()
.collect::<HashSet<_>>()
.is_subset(&left_word_boundaries(r"C:\path\file.ext").into_iter().collect()));
}
}
Loading