From 2d446722d89252f0ed2384e4d43e1b296226d02d Mon Sep 17 00:00:00 2001 From: Ramon Navarro Bosch Date: Mon, 25 May 2020 19:56:52 +0200 Subject: [PATCH 1/8] Adding facet filter and search --- src/index.rs | 38 +++++++++++++++++- src/searcher.rs | 93 ++++++++++++++++++++++++++++++++++++++++--- tests/tantivy_test.py | 33 +++++++++++++-- 3 files changed, 155 insertions(+), 9 deletions(-) diff --git a/src/index.rs b/src/index.rs index 89347532..4cca6846 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,6 +1,8 @@ #![allow(clippy::new_ret_no_self)] -use pyo3::{exceptions, prelude::*, types::PyAny}; +use pyo3::exceptions; +use pyo3::prelude::*; +use pyo3::types::{PyAny, PyDict, PyTuple, PyList}; use crate::{ document::{extract_value, Document}, @@ -308,6 +310,7 @@ impl Index { &self, query: &str, default_field_names: Option>, + filters: Option<&PyDict> ) -> PyResult { let mut default_fields = vec![]; let schema = self.index.schema(); @@ -342,6 +345,39 @@ impl Index { tv::query::QueryParser::for_index(&self.index, default_fields); let query = parser.parse_query(query).map_err(to_pyerr)?; + if let Some(filters_dict) = filters { + let mut query_vec = Vec::new(); + query_vec.push((tv::query::Occur::Must, query)); + for key_value_any in filters_dict.items() { + if let Ok(key_value) = key_value_any.downcast::() { + if key_value.len() != 2 { + continue; + } + let key: String = key_value.get_item(0).extract()?; + let field = schema.get_field(&key).ok_or_else(|| { + exceptions::ValueError::py_err(format!( + "Field `{}` is not defined in the schema.", + key + )) + })?; + + if let Ok(value_list) = key_value.get_item(1).downcast::() { + for value_element in value_list { + if let Ok(s) = value_element.extract::() { + let facet = tv::schema::Facet::from_text(&s); + let term = tv::schema::Term::from_facet(field, &facet); + let term_query = tv::query::TermQuery::new(term, tv::schema::IndexRecordOption::Basic); + let query: Box = Box::new(term_query); + query_vec.push((tv::query::Occur::Must, query)); + } + } + } + } + } + let boolean_query = tv::query::BooleanQuery::from(query_vec); + return Ok(Query { inner: Box::new(boolean_query) }) + } + Ok(Query { inner: query }) } } diff --git a/src/searcher.rs b/src/searcher.rs index 2f0cc1bf..8ade498b 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -1,7 +1,9 @@ #![allow(clippy::new_ret_no_self)] use crate::{document::Document, get_field, query::Query, to_pyerr}; +use pyo3::types::{PyDict, PyTuple, PyList}; use pyo3::{exceptions::PyValueError, prelude::*, PyObjectProtocol}; +use std::collections::BTreeMap; use tantivy as tv; use tantivy::collector::{Count, MultiCollector, TopDocs}; @@ -41,10 +43,11 @@ impl ToPyObject for Fruit { /// Object holding a results successful search. pub(crate) struct SearchResult { hits: Vec<(Fruit, DocAddress)>, + facets_result: BTreeMap>, #[pyo3(get)] /// How many documents matched the query. Only available if `count` was set /// to true during the search. - count: Option, + count: Option } #[pyproto] @@ -52,11 +55,11 @@ impl PyObjectProtocol for SearchResult { fn __repr__(&self) -> PyResult { if let Some(count) = self.count { Ok(format!( - "SearchResult(hits: {:?}, count: {})", - self.hits, count + "SearchResult(hits: {:?}, count: {}, facets: {})", + self.hits, count, self.facets_result.len() )) } else { - Ok(format!("SearchResult(hits: {:?})", self.hits)) + Ok(format!("SearchResult(hits: {:?}, facets: {})", self.hits, self.facets_result.len())) } } } @@ -74,6 +77,12 @@ impl SearchResult { .collect(); Ok(ret) } + + #[getter] + fn facets(&self, _py: Python) -> PyResult>> { + Ok(self.facets_result.clone()) + } + } #[pymethods] @@ -90,6 +99,8 @@ impl Searcher { /// should be ordered by. The field must be declared as a fast field /// when building the schema. Note, this only works for unsigned /// fields. + /// facets (PyDict, optional): A dictionary of facet fields and keys to + /// filter. /// offset (Field, optional): The offset from which the results have /// to be returned. /// @@ -105,6 +116,7 @@ impl Searcher { count: bool, order_by_field: Option<&str>, offset: usize, + facets: Option<&PyDict> ) -> PyResult { let mut multicollector = MultiCollector::new(); @@ -114,6 +126,35 @@ impl Searcher { None }; + let mut facets_requests = BTreeMap::new(); + + // We create facets collector for each field and terms defined on the facets args + if let Some(facets_dict) = facets { + + for key_value_any in facets_dict.items() { + if let Ok(key_value) = key_value_any.downcast::() { + if key_value.len() != 2 { + continue; + } + let key: String = key_value.get_item(0).extract()?; + let field = get_field(&self.inner.index().schema(), &key)?; + + let mut facet_collector = tv::collector::FacetCollector::for_field(field); + + if let Ok(value_list) = key_value.get_item(1).downcast::() { + for value_element in value_list { + if let Ok(s) = value_element.extract::() { + facet_collector.add_facet(&s); + } + + } + let facet_handler = multicollector.add_collector(facet_collector); + facets_requests.insert(key, facet_handler); + } + } + } + } + let (mut multifruit, hits) = { if let Some(order_by) = order_by_field { let field = get_field(&self.inner.index().schema(), order_by)?; @@ -162,7 +203,38 @@ impl Searcher { None => None, }; - Ok(SearchResult { hits, count }) + let mut facets_result: BTreeMap> = + BTreeMap::new(); + + // Go though all collectors that are registered + for (key, facet_collector) in facets_requests { + let facet_count = facet_collector.extract(&mut multifruit); + let mut facet_vec = Vec::new(); + if let Some(facets_dict) = facets { + match facets_dict.get_item(key.clone()) { + Some(facets_list_by_key) => { + if let Ok(facets_list_by_key_native) = facets_list_by_key.downcast::() { + for facet_value in facets_list_by_key_native { + if let Ok(s) = facet_value.extract::() { + let facet_value_vec: Vec<(&tv::schema::Facet, u64)> = facet_count + .get(&s) + .collect(); + + // Go for all elements on facet and count to add on vector + for (facet_value_vec_element, facet_count) in facet_value_vec { + facet_vec.push((facet_value_vec_element.to_string(), facet_count)) + } + } + } + } + } + None => println!("Not found.") + } + } + facets_result.insert(key.clone(), facet_vec); + } + + Ok(SearchResult { hits, count, facets_result }) } /// Returns the overall number of documents in the index. @@ -171,6 +243,17 @@ impl Searcher { self.inner.num_docs() } + fn docn(&self, seg_doc: &PyTuple) -> PyResult { + let seg : u32 = seg_doc.get_item(0).extract()?; + let doc : u32 = seg_doc.get_item(1).extract()?; + let address = tv::DocAddress(seg, doc); + let doc = self.inner.doc(address).map_err(to_pyerr)?; + let named_doc = self.inner.schema().to_named_doc(&doc); + Ok(Document { + field_values: named_doc.0, + }) + } + /// Fetches a document from Tantivy's store given a DocAddress. /// /// Args: diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 8c3b6368..ff2a1b06 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -5,7 +5,7 @@ def schema(): - return SchemaBuilder().add_text_field("title", stored=True).add_text_field("body").build() + return SchemaBuilder().add_text_field("title", stored=True).add_text_field("body").add_facet_field("facet").build() def create_index(dir=None): # assume all tests will use the same documents for now @@ -27,6 +27,7 @@ def create_index(dir=None): "now without taking a fish." ), ) + doc.add_facet('facet', tantivy.Facet.from_string("/mytag")) writer.add_document(doc) # 2 use the built-in json support # keys need to coincide with field names @@ -117,14 +118,40 @@ def test_and_query_parser_default_fields(self, ram_index): assert repr(query) == """Query(TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114])))""" def test_and_query_parser_default_fields_undefined(self, ram_index): - query = ram_index.parse_query("winter") + query = ram_index.parse_query("/winter") assert ( repr(query) == "Query(BooleanQuery { subqueries: [" "(Should, TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))), " - "(Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114])))] " + "(Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114]))), " + "(Should, TermQuery(Term(field=2,bytes=[119, 105, 110, 116, 101, 114])))] " "})" ) + def test_and_query_parser_default_fields_facets(self, ram_index): + index = ram_index + query = index.parse_query("old", default_field_names=["title", "body"], filters={"facet": ["/mytag"]}) + # look for an intersection of documents + searcher = index.searcher() + result = searcher.search(query, 10) + assert result.count == 1 + + query = index.parse_query("old", default_field_names=["title", "body"], filters={"facet": ["/wrongtag"]}) + # look for an intersection of documents + searcher = index.searcher() + result = searcher.search(query, 10) + assert result.count == 0 + + def test_search_facets(self, ram_index): + index = ram_index + query = index.parse_query("old", default_field_names=["title", "body"]) + # look for an intersection of documents + searcher = index.searcher() + result = searcher.search(query, 10, facets={"facet": ["/"]}) + assert result.count == 1 + assert ('/mytag', 1) in result.facets['facet'] + + + def test_query_errors(self, ram_index): index = ram_index # no "bod" field From 674727d1a863be52f669d0a110bcd8b518faeb8d Mon Sep 17 00:00:00 2001 From: Ramon Navarro Bosch Date: Mon, 25 May 2020 23:31:38 +0200 Subject: [PATCH 2/8] linter --- src/index.rs | 23 +++++++++++----- src/searcher.rs | 73 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 67 insertions(+), 29 deletions(-) diff --git a/src/index.rs b/src/index.rs index 4cca6846..8d836706 100644 --- a/src/index.rs +++ b/src/index.rs @@ -2,7 +2,7 @@ use pyo3::exceptions; use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict, PyTuple, PyList}; +use pyo3::types::{PyAny, PyDict, PyList, PyTuple}; use crate::{ document::{extract_value, Document}, @@ -310,7 +310,7 @@ impl Index { &self, query: &str, default_field_names: Option>, - filters: Option<&PyDict> + filters: Option<&PyDict>, ) -> PyResult { let mut default_fields = vec![]; let schema = self.index.schema(); @@ -361,13 +361,20 @@ impl Index { )) })?; - if let Ok(value_list) = key_value.get_item(1).downcast::() { + if let Ok(value_list) = + key_value.get_item(1).downcast::() + { for value_element in value_list { if let Ok(s) = value_element.extract::() { let facet = tv::schema::Facet::from_text(&s); - let term = tv::schema::Term::from_facet(field, &facet); - let term_query = tv::query::TermQuery::new(term, tv::schema::IndexRecordOption::Basic); - let query: Box = Box::new(term_query); + let term = + tv::schema::Term::from_facet(field, &facet); + let term_query = tv::query::TermQuery::new( + term, + tv::schema::IndexRecordOption::Basic, + ); + let query: Box = + Box::new(term_query); query_vec.push((tv::query::Occur::Must, query)); } } @@ -375,7 +382,9 @@ impl Index { } } let boolean_query = tv::query::BooleanQuery::from(query_vec); - return Ok(Query { inner: Box::new(boolean_query) }) + return Ok(Query { + inner: Box::new(boolean_query), + }); } Ok(Query { inner: query }) diff --git a/src/searcher.rs b/src/searcher.rs index 8ade498b..83aa375d 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -1,8 +1,12 @@ #![allow(clippy::new_ret_no_self)] -use crate::{document::Document, get_field, query::Query, to_pyerr}; -use pyo3::types::{PyDict, PyTuple, PyList}; -use pyo3::{exceptions::PyValueError, prelude::*, PyObjectProtocol}; +use crate::document::Document; +use crate::query::Query; +use crate::{get_field, to_pyerr}; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList, PyTuple}; +use pyo3::PyObjectProtocol; use std::collections::BTreeMap; use tantivy as tv; use tantivy::collector::{Count, MultiCollector, TopDocs}; @@ -56,10 +60,16 @@ impl PyObjectProtocol for SearchResult { if let Some(count) = self.count { Ok(format!( "SearchResult(hits: {:?}, count: {}, facets: {})", - self.hits, count, self.facets_result.len() + self.hits, + count, + self.facets_result.len() )) } else { - Ok(format!("SearchResult(hits: {:?}, facets: {})", self.hits, self.facets_result.len())) + Ok(format!( + "SearchResult(hits: {:?}, facets: {})", + self.hits, + self.facets_result.len() + )) } } } @@ -79,10 +89,12 @@ impl SearchResult { } #[getter] - fn facets(&self, _py: Python) -> PyResult>> { + fn facets( + &self, + _py: Python, + ) -> PyResult>> { Ok(self.facets_result.clone()) } - } #[pymethods] @@ -116,7 +128,7 @@ impl Searcher { count: bool, order_by_field: Option<&str>, offset: usize, - facets: Option<&PyDict> + facets: Option<&PyDict>, ) -> PyResult { let mut multicollector = MultiCollector::new(); @@ -130,7 +142,6 @@ impl Searcher { // We create facets collector for each field and terms defined on the facets args if let Some(facets_dict) = facets { - for key_value_any in facets_dict.items() { if let Ok(key_value) = key_value_any.downcast::() { if key_value.len() != 2 { @@ -139,16 +150,20 @@ impl Searcher { let key: String = key_value.get_item(0).extract()?; let field = get_field(&self.inner.index().schema(), &key)?; - let mut facet_collector = tv::collector::FacetCollector::for_field(field); + let mut facet_collector = + tv::collector::FacetCollector::for_field(field); - if let Ok(value_list) = key_value.get_item(1).downcast::() { + if let Ok(value_list) = + key_value.get_item(1).downcast::() + { for value_element in value_list { if let Ok(s) = value_element.extract::() { facet_collector.add_facet(&s); } } - let facet_handler = multicollector.add_collector(facet_collector); + let facet_handler = + multicollector.add_collector(facet_collector); facets_requests.insert(key, facet_handler); } } @@ -204,7 +219,7 @@ impl Searcher { }; let mut facets_result: BTreeMap> = - BTreeMap::new(); + BTreeMap::new(); // Go though all collectors that are registered for (key, facet_collector) in facets_requests { @@ -213,16 +228,26 @@ impl Searcher { if let Some(facets_dict) = facets { match facets_dict.get_item(key.clone()) { Some(facets_list_by_key) => { - if let Ok(facets_list_by_key_native) = facets_list_by_key.downcast::() { + if let Ok(facets_list_by_key_native) = + facets_list_by_key.downcast::() + { for facet_value in facets_list_by_key_native { if let Ok(s) = facet_value.extract::() { - let facet_value_vec: Vec<(&tv::schema::Facet, u64)> = facet_count - .get(&s) - .collect(); + let facet_value_vec: Vec<( + &tv::schema::Facet, + u64, + )> = facet_count.get(&s).collect(); // Go for all elements on facet and count to add on vector - for (facet_value_vec_element, facet_count) in facet_value_vec { - facet_vec.push((facet_value_vec_element.to_string(), facet_count)) + for ( + facet_value_vec_element, + facet_count, + ) in facet_value_vec + { + facet_vec.push(( + facet_value_vec_element.to_string(), + facet_count, + )) } } } @@ -234,7 +259,11 @@ impl Searcher { facets_result.insert(key.clone(), facet_vec); } - Ok(SearchResult { hits, count, facets_result }) + Ok(SearchResult { + hits, + count, + facets_result, + }) } /// Returns the overall number of documents in the index. @@ -244,8 +273,8 @@ impl Searcher { } fn docn(&self, seg_doc: &PyTuple) -> PyResult { - let seg : u32 = seg_doc.get_item(0).extract()?; - let doc : u32 = seg_doc.get_item(1).extract()?; + let seg: u32 = seg_doc.get_item(0).extract()?; + let doc: u32 = seg_doc.get_item(1).extract()?; let address = tv::DocAddress(seg, doc); let doc = self.inner.doc(address).map_err(to_pyerr)?; let named_doc = self.inner.schema().to_named_doc(&doc); From 1d4aebacdfd23b6899a78adf9409010fcbb40fcd Mon Sep 17 00:00:00 2001 From: Ramon Navarro Bosch Date: Tue, 26 May 2020 08:44:53 +0200 Subject: [PATCH 3/8] Simplify parse query facets format --- src/index.rs | 46 +------------------------------------------ tests/tantivy_test.py | 4 ++-- 2 files changed, 3 insertions(+), 47 deletions(-) diff --git a/src/index.rs b/src/index.rs index 8d836706..3f8cdac5 100644 --- a/src/index.rs +++ b/src/index.rs @@ -2,7 +2,7 @@ use pyo3::exceptions; use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict, PyList, PyTuple}; +use pyo3::types::PyAny; use crate::{ document::{extract_value, Document}, @@ -310,7 +310,6 @@ impl Index { &self, query: &str, default_field_names: Option>, - filters: Option<&PyDict>, ) -> PyResult { let mut default_fields = vec![]; let schema = self.index.schema(); @@ -344,49 +343,6 @@ impl Index { let parser = tv::query::QueryParser::for_index(&self.index, default_fields); let query = parser.parse_query(query).map_err(to_pyerr)?; - - if let Some(filters_dict) = filters { - let mut query_vec = Vec::new(); - query_vec.push((tv::query::Occur::Must, query)); - for key_value_any in filters_dict.items() { - if let Ok(key_value) = key_value_any.downcast::() { - if key_value.len() != 2 { - continue; - } - let key: String = key_value.get_item(0).extract()?; - let field = schema.get_field(&key).ok_or_else(|| { - exceptions::ValueError::py_err(format!( - "Field `{}` is not defined in the schema.", - key - )) - })?; - - if let Ok(value_list) = - key_value.get_item(1).downcast::() - { - for value_element in value_list { - if let Ok(s) = value_element.extract::() { - let facet = tv::schema::Facet::from_text(&s); - let term = - tv::schema::Term::from_facet(field, &facet); - let term_query = tv::query::TermQuery::new( - term, - tv::schema::IndexRecordOption::Basic, - ); - let query: Box = - Box::new(term_query); - query_vec.push((tv::query::Occur::Must, query)); - } - } - } - } - } - let boolean_query = tv::query::BooleanQuery::from(query_vec); - return Ok(Query { - inner: Box::new(boolean_query), - }); - } - Ok(Query { inner: query }) } } diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index ff2a1b06..e6b05bc4 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -129,13 +129,13 @@ def test_and_query_parser_default_fields_undefined(self, ram_index): def test_and_query_parser_default_fields_facets(self, ram_index): index = ram_index - query = index.parse_query("old", default_field_names=["title", "body"], filters={"facet": ["/mytag"]}) + query = index.parse_query("old +facet:/mytag", default_field_names=["title", "body"]) # look for an intersection of documents searcher = index.searcher() result = searcher.search(query, 10) assert result.count == 1 - query = index.parse_query("old", default_field_names=["title", "body"], filters={"facet": ["/wrongtag"]}) + query = index.parse_query("old +facet:/wrong", default_field_names=["title", "body"]) # look for an intersection of documents searcher = index.searcher() result = searcher.search(query, 10) From 69b68efda3f5a94c8a65c5a47e588395c6b5ddae Mon Sep 17 00:00:00 2001 From: Ramon Navarro Bosch Date: Wed, 24 Mar 2021 11:22:11 +0100 Subject: [PATCH 4/8] Fixing syntax on code to fix PR21 --- .gitignore | 1 + src/searcher.rs | 13 ++---------- tests/tantivy_test.py | 46 +++++++++++++++++++++++++++++-------------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 72ff37d7..8205e70a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ __pycache__/ tantivy.so tantivy/tantivy.cpython*.so tantivy.egg-info/ +.python-version \ No newline at end of file diff --git a/src/searcher.rs b/src/searcher.rs index 83aa375d..a3dfe6f7 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -89,6 +89,8 @@ impl SearchResult { } #[getter] + /// The list of facets that are requested on the search based on the + /// search results. fn facets( &self, _py: Python, @@ -272,17 +274,6 @@ impl Searcher { self.inner.num_docs() } - fn docn(&self, seg_doc: &PyTuple) -> PyResult { - let seg: u32 = seg_doc.get_item(0).extract()?; - let doc: u32 = seg_doc.get_item(1).extract()?; - let address = tv::DocAddress(seg, doc); - let doc = self.inner.doc(address).map_err(to_pyerr)?; - let named_doc = self.inner.schema().to_named_doc(&doc); - Ok(Document { - field_values: named_doc.0, - }) - } - /// Fetches a document from Tantivy's store given a DocAddress. /// /// Args: diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index e6b05bc4..dad39c67 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -5,7 +5,14 @@ def schema(): - return SchemaBuilder().add_text_field("title", stored=True).add_text_field("body").add_facet_field("facet").build() + return ( + SchemaBuilder() + .add_text_field("title", stored=True) + .add_text_field("body") + .add_facet_field("facet") + .build() + ) + def create_index(dir=None): # assume all tests will use the same documents for now @@ -27,7 +34,7 @@ def create_index(dir=None): "now without taking a fish." ), ) - doc.add_facet('facet', tantivy.Facet.from_string("/mytag")) + doc.add_facet("facet", tantivy.Facet.from_string("/mytag")) writer.add_document(doc) # 2 use the built-in json support # keys need to coincide with field names @@ -100,7 +107,9 @@ def test_simple_search_in_ram(self, ram_index): def test_and_query(self, ram_index): index = ram_index - query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"]) + query = index.parse_query( + "title:men AND body:summer", default_field_names=["title", "body"] + ) # look for an intersection of documents searcher = index.searcher() result = searcher.search(query, 10) @@ -115,7 +124,10 @@ def test_and_query(self, ram_index): def test_and_query_parser_default_fields(self, ram_index): query = ram_index.parse_query("winter", default_field_names=["title"]) - assert repr(query) == """Query(TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114])))""" + assert ( + repr(query) + == """Query(TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114])))""" + ) def test_and_query_parser_default_fields_undefined(self, ram_index): query = ram_index.parse_query("/winter") @@ -129,13 +141,17 @@ def test_and_query_parser_default_fields_undefined(self, ram_index): def test_and_query_parser_default_fields_facets(self, ram_index): index = ram_index - query = index.parse_query("old +facet:/mytag", default_field_names=["title", "body"]) + query = index.parse_query( + "old +facet:/mytag", default_field_names=["title", "body"] + ) # look for an intersection of documents searcher = index.searcher() result = searcher.search(query, 10) assert result.count == 1 - query = index.parse_query("old +facet:/wrong", default_field_names=["title", "body"]) + query = index.parse_query( + "old +facet:/wrong", default_field_names=["title", "body"] + ) # look for an intersection of documents searcher = index.searcher() result = searcher.search(query, 10) @@ -148,9 +164,7 @@ def test_search_facets(self, ram_index): searcher = index.searcher() result = searcher.search(query, 10, facets={"facet": ["/"]}) assert result.count == 1 - assert ('/mytag', 1) in result.facets['facet'] - - + assert ("/mytag", 1) in result.facets["facet"] def test_query_errors(self, ram_index): index = ram_index @@ -159,9 +173,11 @@ def test_query_errors(self, ram_index): index.parse_query("bod:men", ["title", "body"]) def test_order_by_search(self): - schema = (SchemaBuilder() + schema = ( + SchemaBuilder() .add_unsigned_field("order", fast="single") - .add_text_field("title", stored=True).build() + .add_text_field("title", stored=True) + .build() ) index = Index(schema) @@ -182,7 +198,6 @@ def test_order_by_search(self): doc.add_unsigned("order", 1) doc.add_text("title", "Another test title") - writer.add_document(doc) writer.commit() @@ -190,7 +205,6 @@ def test_order_by_search(self): query = index.parse_query("test") - searcher = index.searcher() result = searcher.search(query, 10, offset=2, order_by_field="order") @@ -214,9 +228,11 @@ def test_order_by_search(self): assert searched_doc["title"] == ["Test title"] def test_order_by_search_without_fast_field(self): - schema = (SchemaBuilder() + schema = ( + SchemaBuilder() .add_unsigned_field("order") - .add_text_field("title", stored=True).build() + .add_text_field("title", stored=True) + .build() ) index = Index(schema) From f00554b495ab6cc756437f436e5c53ec893fb5ec Mon Sep 17 00:00:00 2001 From: Ramon Navarro Bosch Date: Wed, 24 Mar 2021 12:25:51 +0100 Subject: [PATCH 5/8] Upgrading to tantivy 0.14 --- src/index.rs | 2 +- src/schemabuilder.rs | 25 +++++++++++++++++++++++-- src/searcher.rs | 15 +++++---------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/index.rs b/src/index.rs index 3f8cdac5..38460d2b 100644 --- a/src/index.rs +++ b/src/index.rs @@ -279,7 +279,7 @@ impl Index { #[staticmethod] fn exists(path: &str) -> PyResult { let directory = MmapDirectory::open(path).map_err(to_pyerr)?; - Ok(tv::Index::exists(&directory)) + Ok(tv::Index::exists(&directory).unwrap()) } /// The schema of the current index. diff --git a/src/schemabuilder.rs b/src/schemabuilder.rs index 58b2a275..e9346e3a 100644 --- a/src/schemabuilder.rs +++ b/src/schemabuilder.rs @@ -253,11 +253,18 @@ impl SchemaBuilder { /// /// Args: /// name (str): The name of the field. - fn add_bytes_field(&mut self, name: &str) -> PyResult { + fn add_bytes_field( + &mut self, + name: &str, + stored: bool, + indexed: bool, + fast: bool + ) -> PyResult { let builder = &mut self.builder; + let opts = SchemaBuilder::build_bytes_option(stored, indexed, fast)?; if let Some(builder) = builder.write().unwrap().as_mut() { - builder.add_bytes_field(name); + builder.add_bytes_field(name, opts); } else { return Err(exceptions::PyValueError::new_err( "Schema builder object isn't valid anymore.", @@ -316,4 +323,18 @@ impl SchemaBuilder { Ok(opts) } + + fn build_bytes_option( + stored: bool, + indexed: bool, + fast: bool, + ) -> PyResult { + let opts = schema::BytesOptions::default(); + + let opts = if stored { opts.set_stored() } else { opts }; + let opts = if indexed { opts.set_indexed() } else { opts }; + let opts = if fast { opts.set_fast() } else { opts }; + + Ok(opts) + } } diff --git a/src/searcher.rs b/src/searcher.rs index a3dfe6f7..0c7c4e59 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -1,13 +1,9 @@ #![allow(clippy::new_ret_no_self)] -use crate::document::Document; -use crate::query::Query; -use crate::{get_field, to_pyerr}; -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; use pyo3::types::{PyDict, PyList, PyTuple}; -use pyo3::PyObjectProtocol; use std::collections::BTreeMap; +use crate::{document::Document, get_field, query::Query, to_pyerr}; +use pyo3::{exceptions::PyValueError, prelude::*, PyObjectProtocol}; use tantivy as tv; use tantivy::collector::{Count, MultiCollector, TopDocs}; @@ -51,7 +47,7 @@ pub(crate) struct SearchResult { #[pyo3(get)] /// How many documents matched the query. Only available if `count` was set /// to true during the search. - count: Option + count: Option, } #[pyproto] @@ -129,8 +125,8 @@ impl Searcher { limit: usize, count: bool, order_by_field: Option<&str>, - offset: usize, facets: Option<&PyDict>, + offset: usize, ) -> PyResult { let mut multicollector = MultiCollector::new(); @@ -162,7 +158,6 @@ impl Searcher { if let Ok(s) = value_element.extract::() { facet_collector.add_facet(&s); } - } let facet_handler = multicollector.add_collector(facet_collector); @@ -255,7 +250,7 @@ impl Searcher { } } } - None => println!("Not found.") + None => println!("Not found."), } } facets_result.insert(key.clone(), facet_vec); From d8bd622f5ebcbb2d8c3c575b6d6820d7fd95ca8f Mon Sep 17 00:00:00 2001 From: Ramon Navarro Bosch Date: Wed, 24 Mar 2021 12:26:03 +0100 Subject: [PATCH 6/8] Missing Cargo --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8d047590..10c6de77 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.13.2" +version = "0.14.0" readme = "README.md" authors = ["Damir Jelić "] edition = "2018" @@ -12,7 +12,7 @@ crate-type = ["cdylib"] [dependencies] chrono = "0.4.19" -tantivy = "0.13.2" +tantivy = "0.14" itertools = "0.9.0" futures = "0.3.5" From 017b5ae5752f39c127625921f2dbff422f8c2f71 Mon Sep 17 00:00:00 2001 From: Ramon Navarro Bosch Date: Wed, 24 Mar 2021 12:51:02 +0100 Subject: [PATCH 7/8] Format --- src/schemabuilder.rs | 2 +- src/searcher.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/schemabuilder.rs b/src/schemabuilder.rs index e9346e3a..980b0967 100644 --- a/src/schemabuilder.rs +++ b/src/schemabuilder.rs @@ -258,7 +258,7 @@ impl SchemaBuilder { name: &str, stored: bool, indexed: bool, - fast: bool + fast: bool, ) -> PyResult { let builder = &mut self.builder; let opts = SchemaBuilder::build_bytes_option(stored, indexed, fast)?; diff --git a/src/searcher.rs b/src/searcher.rs index 0c7c4e59..7d964073 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -1,9 +1,9 @@ #![allow(clippy::new_ret_no_self)] -use pyo3::types::{PyDict, PyList, PyTuple}; -use std::collections::BTreeMap; use crate::{document::Document, get_field, query::Query, to_pyerr}; +use pyo3::types::{PyDict, PyList, PyTuple}; use pyo3::{exceptions::PyValueError, prelude::*, PyObjectProtocol}; +use std::collections::BTreeMap; use tantivy as tv; use tantivy::collector::{Count, MultiCollector, TopDocs}; From 937436e67a87bdf90a4076d3f4d1017fbf7d9a27 Mon Sep 17 00:00:00 2001 From: Theo Linnemann Date: Wed, 21 Jul 2021 22:30:40 -0400 Subject: [PATCH 8/8] Cargo.toml: Update tantivy crate and pyo3 version --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 10c6de77..f61fe17b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,12 +12,12 @@ crate-type = ["cdylib"] [dependencies] chrono = "0.4.19" -tantivy = "0.14" +tantivy = "0.14.0" itertools = "0.9.0" futures = "0.3.5" [dependencies.pyo3] -version = "0.13.2" +version = "0.14.1" features = ["extension-module"] [package.metadata.maturin]