Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update tantivy to 0.6.1 #28

Merged
merged 7 commits into from
Jul 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
645 changes: 346 additions & 299 deletions Cargo.lock

Large diffs are not rendered by default.

9 changes: 2 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "tantivy-cli"
version = "0.5.1"
version = "0.6.1"
authors = ["Paul Masurel <[email protected]>"]

description = """Command line interface for Tantivy, a search engine library."""
Expand Down Expand Up @@ -30,7 +30,7 @@ byteorder = "0.5"
log = "0.3"
futures = "0.1"
env_logger = "0.3"
tantivy = "0.5.1"
tantivy = "0.6.1"

[[bin]]
name = "tantivy"
Expand All @@ -42,8 +42,3 @@ opt-level = 3
debug = false
debug-assertions = false
lto = true


[features]
default = ["tantivy/simdcompression"]

14 changes: 4 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,8 @@ In this tutorial, we will create a brand new index with the articles of English

There are a couple ways to install `tantivy-cli`.

If you are a Rust programmer, you probably have `cargo` and `rustup` installed and you can just
run `rustup run nightly cargo install tantivy-cli`. (`cargo install tantivy-cli` will work
as well if nightly is your default toolchain).

Alternatively, you can directly download a
static binary for [Linux x86 64](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-unknown-linux-musl.tar.gz) or for [Mac OS X](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-apple-darwin.tar.gz)
and save it in a directory on your system's `PATH`.

If you are a Rust programmer, you probably have `cargo` installed and you can just
run `cargo install tantivy-cli`



Expand Down Expand Up @@ -217,11 +211,11 @@ the following [url](http://localhost:3000/api/?q=barack+obama&nhits=20) in your
By default this query is treated as `barack OR obama`.
You can also search for documents that contains both term, by adding a `+` sign before the terms in your query.

http://localhost:3000/api/?q=%2Bbarack%20%2Bobama%0A&nhits=20
http://localhost:3000/api/?q=%2Bbarack%20%2Bobama&nhits=20

Also, `-` makes it possible to remove documents the documents containing a specific term.

http://localhost:3000/api/?q=-barack%20%2Bobama%0A&nhits=20
http://localhost:3000/api/?q=-barack%20%2Bobama&nhits=20

Finally tantivy handle phrase queries.

Expand Down
9 changes: 5 additions & 4 deletions src/commands/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use tantivy::Index;
use tantivy::schema::{Field, Schema};
use tantivy::query::QueryParser;
use std::path::Path;
use tantivy::TimerTree;
use std::io::BufReader;
use std::io::BufRead;
use std::io;
Expand All @@ -12,6 +11,7 @@ use tantivy::collector::TopCollector;
use tantivy::collector::CountCollector;
use clap::ArgMatches;
use std::path::PathBuf;
use timer::TimerTree;


pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
Expand Down Expand Up @@ -52,7 +52,7 @@ fn run_bench(index_path: &Path,
println!("Query : {:?}", index_path);
println!("-------------------------------\n\n\n");

let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let searcher = index.searcher();
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?;
Expand All @@ -66,10 +66,11 @@ fn run_bench(index_path: &Path,
// let num_terms = query.num_terms();
let mut top_collector = TopCollector::with_limit(10);
let mut count_collector = CountCollector::default();
let timing;
let mut timing = TimerTree::default();
{
let _search = timing.open("search");
let mut collector = chain().push(&mut top_collector).push(&mut count_collector);
timing = query.search(&searcher, &mut collector)
query.search(&searcher, &mut collector)
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?;
}
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time());
Expand Down
2 changes: 1 addition & 1 deletion src/commands/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fn run_index(directory: PathBuf,
num_threads: usize,
no_merge: bool) -> tantivy::Result<()> {

let index = Index::open(&directory)?;
let index = Index::open_in_dir(&directory)?;
let schema = index.schema();
let (line_sender, line_receiver) = chan::sync(10_000);
let (doc_sender, doc_receiver) = chan::sync(10_000);
Expand Down
6 changes: 3 additions & 3 deletions src/commands/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> {


fn run_merge(path: PathBuf) -> tantivy::Result<()> {
let index = Index::open(&path)?;
let index = Index::open_in_dir(&path)?;
let segments = index.searchable_segment_ids()?;
let segment_meta: SegmentMeta = index
.writer(HEAP_SIZE)?
.merge(&segments)
.merge(&segments)?
.wait()
.expect("Merge failed");
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
println!("Merge finished with segment meta {:?}", segment_meta);
println!("Garbage collect irrelevant segments.");
Index::open(&path)?
Index::open_in_dir(&path)?
.writer_with_num_threads(1, 40_000_000)?
.garbage_collect_files()?;
Ok(())
Expand Down
4 changes: 2 additions & 2 deletions src/commands/new.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate:
io::stdout().flush().unwrap();
let mut buffer = String::new();
io::stdin().read_line(&mut buffer).ok().expect("Failed to read line");
let answer = buffer.trim_right_matches("\n").to_string();
let answer = buffer.trim_right().to_string();
match predicate(&answer) {
Ok(()) => {
return answer;
Expand Down Expand Up @@ -145,7 +145,7 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> {
let schema = schema_builder.build();
let schema_json = format!("{}", serde_json::to_string_pretty(&schema).unwrap());
println!("\n{}\n", Style::new().fg(Green).paint(schema_json));
Index::create(&directory, schema)?;
Index::create_in_dir(&directory, schema)?;
Ok(())
}

2 changes: 1 addition & 1 deletion src/commands/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
}

fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
let index = Index::open(directory)?;
let index = Index::open_in_dir(directory)?;
let schema = index.schema();
let default_fields: Vec<Field> = schema
.fields()
Expand Down
4 changes: 2 additions & 2 deletions src/commands/serve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ use tantivy::schema::Field;
use tantivy::schema::FieldType;
use tantivy::schema::NamedFieldDocument;
use tantivy::schema::Schema;
use tantivy::TimerTree;
use tantivy::tokenizer::*;
use tantivy::DocAddress;
use timer::TimerTree;
use urlencoded::UrlEncodedQuery;

pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> {
Expand Down Expand Up @@ -76,7 +76,7 @@ struct IndexServer {
impl IndexServer {

fn load(path: &Path) -> IndexServer {
let index = Index::open(path).unwrap();
let index = Index::open_in_dir(path).unwrap();
index.tokenizers()
.register("commoncrawl", SimpleTokenizer
.filter(RemoveLongFilter::limit(40))
Expand Down
1 change: 1 addition & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ extern crate serde_derive;

use clap::{AppSettings, Arg, App, SubCommand};
mod commands;
pub mod timer;
use self::commands::*;


Expand Down
99 changes: 99 additions & 0 deletions src/timer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use time::PreciseTime;

pub struct OpenTimer<'a> {
name: &'static str,
timer_tree: &'a mut TimerTree,
start: PreciseTime,
depth: u32,
}

impl<'a> OpenTimer<'a> {
/// Starts timing a new named subtask
///
/// The timer is stopped automatically
/// when the `OpenTimer` is dropped.
pub fn open(&mut self, name: &'static str) -> OpenTimer {
OpenTimer {
name,
timer_tree: self.timer_tree,
start: PreciseTime::now(),
depth: self.depth + 1,
}
}
}

impl<'a> Drop for OpenTimer<'a> {
fn drop(&mut self) {
self.timer_tree.timings.push(Timing {
name: self.name,
duration: self.start
.to(PreciseTime::now())
.num_microseconds()
.unwrap(),
depth: self.depth,
});
}
}

/// Timing recording
#[derive(Debug, Serialize)]
pub struct Timing {
name: &'static str,
duration: i64,
depth: u32,
}

/// Timer tree
#[derive(Debug, Serialize)]
pub struct TimerTree {
timings: Vec<Timing>,
}

impl TimerTree {
/// Returns the total time elapsed in microseconds
pub fn total_time(&self) -> i64 {
self.timings.last().unwrap().duration
}

/// Open a new named subtask
pub fn open(&mut self, name: &'static str) -> OpenTimer {
OpenTimer {
name,
timer_tree: self,
start: PreciseTime::now(),
depth: 0,
}
}
}

impl Default for TimerTree {
fn default() -> TimerTree {
TimerTree {
timings: Vec::new(),
}
}
}

#[cfg(test)]
mod tests {

use super::*;

#[test]
fn test_timer() {
let mut timer_tree = TimerTree::default();
{
let mut a = timer_tree.open("a");
{
let mut ab = a.open("b");
{
let _abc = ab.open("c");
}
{
let _abd = ab.open("d");
}
}
}
assert_eq!(timer_tree.timings.len(), 4);
}
}