Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to build with recent Rust and benchmark point range queries #18

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
target/
Cargo.lock
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@ tempfile = "3.0.8"

# The release profile, used for `cargo build --release`
[profile.release]
debug = true
opt-level = 3
debug = false
debug-assertions = false
92 changes: 92 additions & 0 deletions examples/basic_points.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
extern crate rucene;

use rucene::core::doc::{DocValuesType, Field, FieldType, Fieldable};
use rucene::core::index::reader::IndexReader;
use rucene::core::index::writer::{IndexWriter, IndexWriterConfig};
use rucene::core::search::collector::TopDocsCollector;
use rucene::core::search::query::LongPoint;
use rucene::core::search::{DefaultIndexSearcher, IndexSearcher};
use rucene::core::store::directory::FSDirectory;

use std::fs;
use std::path::Path;
use std::sync::Arc;

use rucene::error::Result;

fn indexed_numeric_field_type() -> FieldType {
let mut field_type = FieldType::default();
field_type.tokenized = false;
field_type.doc_values_type = DocValuesType::Binary;
field_type.dimension_count = 1;
field_type.dimension_num_bytes = 8;
field_type
}

fn new_index_numeric_field(field_name: String, data: i64) -> Field {
Field::new_bytes(field_name, LongPoint::pack(&[data]), indexed_numeric_field_type())
}
fn main() -> Result<()> {
// create index directory
let path = "/tmp/test_rucene";
let dir_path = Path::new(path);
if dir_path.exists() {
fs::remove_dir_all(&dir_path)?;
fs::create_dir(&dir_path)?;
}

// create index writer
let config = Arc::new(IndexWriterConfig::default());
let directory = Arc::new(FSDirectory::with_path(&dir_path)?);
let writer = IndexWriter::new(directory, config)?;

let mut doc: Vec<Box<dyn Fieldable>> = vec![];

let timestamp: i64 = 1707782905540;

let numeric_field = new_index_numeric_field("timestamp".into(), timestamp);

doc.push(Box::new(numeric_field));

writer.add_document(doc)?;

// flush to disk
writer.commit()?;

// new index search
let reader = writer.get_reader(true, false)?;
let index_searcher = DefaultIndexSearcher::new(Arc::new(reader), None);

// search
let query= LongPoint::new_range_query(
"timestamp".into(),
1707782905539,
1707782905541,
)?;


let mut collector: TopDocsCollector = TopDocsCollector::new(10);
index_searcher.search(&*query, &mut collector)?;

let top_docs = collector.top_docs();
println!("total hits: {}", top_docs.total_hits());
for d in top_docs.score_docs() {
let doc_id = d.doc_id();
println!(" doc: {}", doc_id);
// fetch stored fields
let stored_fields = vec!["timestamp".into()];
let stored_doc = index_searcher.reader().document(doc_id, &stored_fields)?;
if stored_doc.fields.len() > 0 {
println!(" stroed fields: ");
for s in &stored_doc.fields {
println!(
" field: {}, value: {}",
s.field.name(),
s.field.field_data().unwrap()
);
}
}
}

Ok(())
}
133 changes: 133 additions & 0 deletions examples/basic_points_range.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
extern crate rucene;

use rucene::core::doc::{DocValuesType, Field, FieldType, Fieldable};
use rucene::core::index::writer::{IndexWriter, IndexWriterConfig};
use rucene::core::search::collector::TopDocsCollector;
use rucene::core::search::query::LongPoint;
use rucene::core::search::{DefaultIndexSearcher, IndexSearcher};
use rucene::core::store::directory::FSDirectory;

use std::cmp;
use std::fs::{self, File};
use std::io::{self, BufRead};
use std::path::Path;
use std::sync::Arc;
use std::time::{Duration, Instant};

use rucene::error::Result;

fn indexed_numeric_field_type() -> FieldType {
let mut field_type = FieldType::default();
field_type.tokenized = false;
field_type.doc_values_type = DocValuesType::Binary;
field_type.dimension_count = 1;
field_type.dimension_num_bytes = 8;
field_type
}

fn new_index_numeric_field(field_name: String, data: i64) -> Field {
Field::new_bytes(
field_name,
LongPoint::pack(&[data]),
indexed_numeric_field_type(),
)
}

fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines())
}

fn main() -> Result<()> {
// create index directory
let path = "/tmp/test_rucene";
let dir_path = Path::new(path);
// if dir_path.exists() {
// fs::remove_dir_all(&dir_path)?;
// fs::create_dir(&dir_path)?;
// }

// create index writer
let config = Arc::new(IndexWriterConfig::default());
let directory = Arc::new(FSDirectory::with_path(&dir_path)?);
let writer = IndexWriter::new(directory, config)?;

let mut queries = vec![];

let mut sum: u128 = 0;

if let Ok(mut lines) = read_lines("../range_datapoints") {
let num_docs: &i32 = &lines.next().unwrap().unwrap().parse().unwrap();
// Consumes the iterator, returns an (Optional) String

for n in 0..*num_docs {
let timestamp: &i64 = &lines.next().unwrap().unwrap().parse().unwrap();
// let numeric_field = new_index_numeric_field("timestamp".into(), *timestamp);
// let mut doc: Vec<Box<dyn Fieldable>> = vec![];
// doc.push(Box::new(numeric_field));

// writer.add_document(doc)?;

// if n > 0 && n % 1000000 == 0 {
// writer.commit()?;
// }
}
let num_queries: &i32 = &lines.next().unwrap().unwrap().parse().unwrap();

for _ in 0..*num_queries {
let l = lines.next().unwrap().unwrap();

let mut range = l.split(',');

let lower = range.next().unwrap();

let lower_bound: i64 = lower.parse::<i64>().unwrap();

let upper = range.next().unwrap();

let upper_bound: i64 = upper.parse::<i64>().unwrap();

queries.push(LongPoint::new_range_query(
"timestamp".into(),
lower_bound,
upper_bound,
));
}

let reader = writer.get_reader(true, false)?;
let index_searcher = DefaultIndexSearcher::new(Arc::new(reader), None);
// let warmupCount = cmp::min(1000, queries.len());

// for i in 0..warmupCount {
// let mut collector = TopDocsCollector::new(10);
// let query = queries.get(i).unwrap().as_ref().unwrap();
// index_searcher.search(&**query, &mut collector);
// }

let mut hits: u64 = 0;

let overall_start = Instant::now();
for (i, iter) in queries.iter().enumerate() {
let mut collector = TopDocsCollector::new(10);
let query = iter.as_ref().unwrap();
let start_time: Instant = Instant::now();
index_searcher.search(&**query, &mut collector)?;
let time: Duration = Instant::now().duration_since(start_time);
hits += collector.top_docs().total_hits() as u64;
sum += time.as_nanos();
}

println!("Total hits: {}", hits);
println!(
"Searching time: {}",
Instant::now().duration_since(overall_start).as_secs_f64()
);
println!("Queries len: {}", queries.len());
println!("Avg. time: {}", sum / (queries.len() as u128));
}

Ok(())
}
48 changes: 25 additions & 23 deletions examples/example.rs → examples/basic_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,34 +19,36 @@ use rucene::core::highlight::FieldQuery;
use rucene::core::util::VariantValue;
use rucene::error::Result;

fn indexed_text_field_type() -> FieldType {
let mut field_type = FieldType::default();
field_type.index_options = IndexOptions::DocsAndFreqsAndPositionsAndOffsets;
field_type.store_term_vectors = true;
field_type.store_term_vector_offsets = true;
field_type.store_term_vector_positions = true;
field_type
}

fn new_index_text_field(field_name: String, text: String) -> Field {
let token_stream = WhitespaceTokenizer::new(Box::new(StringReader::new(text)));
Field::new(
field_name,
indexed_text_field_type(),
None,
Some(Box::new(token_stream)),
)
}
// fn indexed_text_field_type() -> FieldType {
// let mut field_type = FieldType::default();
// field_type.index_options = IndexOptions::DocsAndFreqsAndPositionsAndOffsets;
// field_type.store_term_vectors = true;
// field_type.store_term_vector_offsets = true;
// field_type.store_term_vector_positions = true;
// field_type
// }

// fn new_index_text_field(field_name: String, text: String) -> Field {
// let token_stream = WhitespaceTokenizer::new(Box::new(StringReader::new(text)));
// Field::new(
// field_name,
// indexed_text_field_type(),
// None,
// Some(Box::new(token_stream)),
// )
// }

fn new_stored_text_field(field_name: String, text: String) -> Field {
let mut field_type = FieldType::default();
field_type.stored = true;

field_type.index_options = IndexOptions::DocsAndFreqsAndPositionsAndOffsets;
Field::new(
field_name,
field_type,
Some(VariantValue::VString(text)),
None,
Some(Box::new(WhitespaceTokenizer::new(Box::new(
StringReader::new("The quick brown fox jumps over a lazy dog".into()),
)))),
)
}

Expand Down Expand Up @@ -89,10 +91,10 @@ fn main() -> Result<()> {
let mut doc: Vec<Box<dyn Fieldable>> = vec![];
// add indexed text field
let text = "The quick brown fox jumps over a lazy dog";
let text_field = new_index_text_field("title".into(), text.into());
doc.push(Box::new(text_field));
// let text_field = new_index_text_field("title".into(), text.into());
// doc.push(Box::new(text_field));
// add raw text field, this used for highlight
let stored_text_field = new_stored_text_field("title.raw".into(), text.into());
let stored_text_field = new_stored_text_field("title".into(), text.into());
doc.push(Box::new(stored_text_field));
// add numeric doc value field
doc.push(Box::new(NumericDocValuesField::new("weight".into(), 1)));
Expand Down
Loading