diff --git a/src/core/codec/doc_values/doc_values_producer.rs b/src/core/codec/doc_values/doc_values_producer.rs index 3bc5635..016ba3d 100644 --- a/src/core/codec/doc_values/doc_values_producer.rs +++ b/src/core/codec/doc_values/doc_values_producer.rs @@ -56,5 +56,3 @@ pub trait DocValuesProducer: Send + Sync { /// Returns an instance optimized for merging. fn get_merge_instance(&self) -> Result>; } - -pub type DocValuesProducerRef = Arc; diff --git a/src/core/index/merge/merge_policy.rs b/src/core/index/merge/merge_policy.rs index ac5effe..35c7d1c 100644 --- a/src/core/index/merge/merge_policy.rs +++ b/src/core/index/merge/merge_policy.rs @@ -636,9 +636,9 @@ impl TieredMergePolicy { next_merges.iter().map(|i| info_seg_bytes[*i]).collect(); info!( - "segment_count={} curr_merge_bytes={} one_merge={:?}", - next_merges_bytes.len(), + "curr_merge_bytes={} segment_count={} one_merge={:?}", curr_merge_bytes, + next_merges_bytes.len(), next_merges_bytes ); let mut segments = Vec::with_capacity(next_merges.len()); diff --git a/src/core/index/mod.rs b/src/core/index/mod.rs index e0c86c1..98047c3 100644 --- a/src/core/index/mod.rs +++ b/src/core/index/mod.rs @@ -33,7 +33,7 @@ pub mod tests { use std::collections::HashMap; use core::codec::doc_values::{ - BinaryDocValues, DocValuesProducerRef, NumericDocValues, SortedDocValues, + BinaryDocValues, DocValuesProducer, NumericDocValues, SortedDocValues, SortedNumericDocValues, SortedSetDocValues, }; use core::codec::field_infos::{FieldInfo, FieldInfos}; @@ -271,7 +271,7 @@ pub mod tests { unimplemented!() } - fn doc_values_reader(&self) -> Result> { + fn doc_values_reader(&self) -> Result>> { unimplemented!() } diff --git a/src/core/index/reader/leaf_reader.rs b/src/core/index/reader/leaf_reader.rs index 92afbb4..1d5a542 100644 --- a/src/core/index/reader/leaf_reader.rs +++ b/src/core/index/reader/leaf_reader.rs @@ -12,8 +12,8 @@ // limitations under the License. use core::codec::doc_values::{ - BinaryDocValues, DocValuesProducerRef, NumericDocValues, SortedDocValues, - SortedNumericDocValues, SortedSetDocValues, + BinaryDocValues, DocValuesProducer, NumericDocValues, SortedDocValues, SortedNumericDocValues, + SortedSetDocValues, }; use core::codec::field_infos::{FieldInfo, FieldInfos}; use core::codec::norms::NormsProducer; @@ -176,7 +176,7 @@ pub trait LeafReader { fn norms_reader(&self) -> Result>; - fn doc_values_reader(&self) -> Result>; + fn doc_values_reader(&self) -> Result>>; fn postings_reader(&self) -> Result; } diff --git a/src/core/index/reader/segment_reader.rs b/src/core/index/reader/segment_reader.rs index 5606ac0..69c1763 100644 --- a/src/core/index/reader/segment_reader.rs +++ b/src/core/index/reader/segment_reader.rs @@ -321,8 +321,6 @@ impl fmt::Display for CfsDirectory { #[derive(Clone)] pub struct SegmentDocValues { dv_producers_by_field: HashMap>, - dv_producers: Vec>, - dv_gens: Vec, } impl SegmentDocValues { @@ -332,76 +330,68 @@ impl SegmentDocValues { infos: Arc, ) -> Self { let mut dv_producers_by_field: HashMap> = HashMap::new(); - let mut dv_producers: Vec> = Vec::new(); - let mut dv_gens: Vec = Vec::new(); - let mut base_doc_values_producer: Option> = None; - for (field, fi) in &infos.by_name { - if fi.doc_values_type == DocValuesType::Null { - continue; - } - let doc_values_gen = fi.dv_gen; - if doc_values_gen == -1 { - // no updated field - if base_doc_values_producer.is_none() { - base_doc_values_producer = Some(Arc::from( - Self::get_doc_values_producer( - doc_values_gen, - si, - dir.clone(), - infos.clone(), - ) - .unwrap(), - )); - dv_gens.push(doc_values_gen); - dv_producers.push(base_doc_values_producer.as_ref().unwrap().clone()); + if let Ok(default_dv_producer) = Self::get_dv_producer(-1, si, dir.clone(), infos.clone()) { + let default_dv_producer: Arc = Arc::from(default_dv_producer); + + for (field, fi) in &infos.by_name { + if fi.doc_values_type == DocValuesType::Null { + continue; + } + + if fi.dv_gen == -1 { + // not updated field + dv_producers_by_field.insert(field.clone(), default_dv_producer.clone()); + } else { + // updated field + if let Ok(field_infos) = FieldInfos::new(vec![fi.as_ref().clone()]) { + let field_infos = Arc::new(field_infos); + if let Ok(dvp) = + Self::get_dv_producer(fi.dv_gen, si, dir.clone(), field_infos) + { + dv_producers_by_field.insert(fi.name.clone(), Arc::from(dvp)); + } else { + error!( + "segment {} get_dv_producer for {} error.", + si.info.name, fi.name + ); + } + } else { + error!( + "segment {} new field_infos for {} error.", + si.info.name, fi.name + ); + } } - dv_producers_by_field.insert( - field.clone(), - base_doc_values_producer.as_ref().unwrap().clone(), - ); - } else { - // updated field - let dvp: Arc = Arc::from( - Self::get_doc_values_producer( - doc_values_gen, - si, - dir.clone(), - Arc::new(FieldInfos::new(vec![fi.as_ref().clone()]).unwrap()), - ) - .unwrap(), - ); - dv_gens.push(doc_values_gen); - dv_producers.push(dvp.clone()); - dv_producers_by_field.insert(fi.name.clone(), dvp.clone()); } + } else { + error!("segment {} get_dv_producer for -1 error.", si.info.name); } + Self { dv_producers_by_field, - dv_producers, - dv_gens, } } - pub fn get_doc_values_producer( + pub fn get_dv_producer( gen: i64, si: &SegmentCommitInfo, dir: Arc, infos: Arc, ) -> Result> { if gen != -1 { - Self::do_get_doc_values_producer( + Self::do_get_dv_producer( si, Arc::clone(&si.info.directory), infos, to_base36(gen as u64), ) } else { - Self::do_get_doc_values_producer(si, dir, infos, "".into()) + Self::do_get_dv_producer(si, dir, infos, "".into()) } } - fn do_get_doc_values_producer( + fn do_get_dv_producer( si: &SegmentCommitInfo, dv_dir: Arc, infos: Arc, @@ -424,6 +414,61 @@ impl SegmentDocValues { } } } + + pub fn get_dv_provider( + dv_producer: Arc, + field_infos: Arc, + ) -> HashMap { + let mut dvs = HashMap::new(); + + for (field_name, field_info) in field_infos.by_name.iter() { + match field_info.doc_values_type { + DocValuesType::Binary => { + if let Ok(cell) = dv_producer.get_binary(field_info) { + dvs.insert( + field_name.to_string(), + DocValuesProviderEnum::Binary(Arc::clone(&cell)), + ); + } + } + DocValuesType::Numeric => { + if let Ok(cell) = dv_producer.get_numeric(field_info) { + dvs.insert( + field_name.to_string(), + DocValuesProviderEnum::Numeric(Arc::clone(&cell)), + ); + } + } + DocValuesType::SortedNumeric => { + if let Ok(cell) = dv_producer.get_sorted_numeric(field_info) { + dvs.insert( + field_name.to_string(), + DocValuesProviderEnum::SortedNumeric(Arc::clone(&cell)), + ); + } + } + DocValuesType::Sorted => { + if let Ok(cell) = dv_producer.get_sorted(field_info) { + dvs.insert( + field_name.to_string(), + DocValuesProviderEnum::Sorted(Arc::clone(&cell)), + ); + } + } + DocValuesType::SortedSet => { + if let Ok(cell) = dv_producer.get_sorted_set(field_info) { + dvs.insert( + field_name.to_string(), + DocValuesProviderEnum::SortedSet(Arc::clone(&cell)), + ); + } + } + _ => {} + } + } + + dvs + } } impl DocValuesProducer for SegmentDocValues { @@ -481,10 +526,10 @@ pub struct SegmentReader { pub core: Arc>, pub is_nrt: bool, pub field_infos: Arc, - doc_values_producer_preload: Arc>>>, - doc_values_producer: ThreadLocalDocValueProducer, - doc_values_local_preload: Arc>>>>, - doc_values_local: CachedThreadLocal>>, + dv_producers_preload: Arc>>>, + dv_producer_local: ThreadLocalDocValueProducer, + dv_providers_preload: Arc>>>>, + dv_provider_local: CachedThreadLocal>>, } unsafe impl Sync for SegmentReader {} @@ -502,85 +547,34 @@ impl SegmentReader { is_nrt: bool, field_infos: Arc, ) -> SegmentReader { - let max_preload_num = num_cpus::get_physical(); - let mut doc_values_producer_preload: Vec> = + let max_preload_num = num_cpus::get_physical() * 3; + let mut dv_producers_preload: Vec> = + Vec::with_capacity(max_preload_num); + let mut dv_providers_preload: Vec>> = Vec::with_capacity(max_preload_num); + for _ in 0..max_preload_num { - if let Some(dv_producer) = SegmentReader::init_doc_values_producer( - core.as_ref(), - si.as_ref(), - field_infos.clone(), - ) { - doc_values_producer_preload.push(dv_producer); + if let Some(dv_producer) = + SegmentReader::get_dv_producer(core.as_ref(), si.as_ref(), field_infos.clone()) + { + dv_producers_preload.push(dv_producer); } } - let dv_producer = SegmentReader::init_doc_values_producer( - core.as_ref(), - si.as_ref(), - field_infos.clone(), - ); - - let doc_values_producer = ThreadLocal::new(); - let mut doc_values_local_preload: Vec>> = - Vec::with_capacity(max_preload_num); - if let Some(dv_producer) = dv_producer { + let dv_producer_local = ThreadLocal::new(); + if let Some(dv_producer) = dv_producers_preload.pop() { for _ in 0..max_preload_num { - let mut dvs = HashMap::new(); - for (field_name, field_info) in field_infos.by_name.iter() { - match field_info.doc_values_type { - DocValuesType::Binary => { - if let Ok(cell) = dv_producer.get_binary(field_info) { - dvs.insert( - field_name.to_string(), - DocValuesProviderEnum::Binary(Arc::clone(&cell)), - ); - } - } - DocValuesType::Numeric => { - if let Ok(cell) = dv_producer.get_numeric(field_info) { - dvs.insert( - field_name.to_string(), - DocValuesProviderEnum::Numeric(Arc::clone(&cell)), - ); - } - } - DocValuesType::SortedNumeric => { - if let Ok(cell) = dv_producer.get_sorted_numeric(field_info) { - dvs.insert( - field_name.to_string(), - DocValuesProviderEnum::SortedNumeric(Arc::clone(&cell)), - ); - } - } - DocValuesType::Sorted => { - if let Ok(cell) = dv_producer.get_sorted(field_info) { - dvs.insert( - field_name.to_string(), - DocValuesProviderEnum::Sorted(Arc::clone(&cell)), - ); - } - } - DocValuesType::SortedSet => { - if let Ok(cell) = dv_producer.get_sorted_set(field_info) { - dvs.insert( - field_name.to_string(), - DocValuesProviderEnum::SortedSet(Arc::clone(&cell)), - ); - } - } - _ => {} - } - } - - doc_values_local_preload.push(RefCell::new(dvs)); + let dvs = + SegmentDocValues::get_dv_provider(dv_producer.clone(), field_infos.clone()); + dv_providers_preload.push(RefCell::new(dvs)); } - - doc_values_producer.get_or(|| Box::new(dv_producer)); + dv_producer_local.get_or(|| Box::new(dv_producer)); } - let doc_values_local = CachedThreadLocal::new(); - doc_values_local.get_or(|| Box::new(RefCell::new(HashMap::new()))); + let dv_provider_local = CachedThreadLocal::new(); + if let Some(dv_preload) = dv_providers_preload.pop() { + dv_provider_local.get_or(|| Box::new(dv_preload)); + } SegmentReader { si, @@ -589,10 +583,10 @@ impl SegmentReader { core, is_nrt, field_infos, - doc_values_producer_preload: Arc::new(RwLock::new(doc_values_producer_preload)), - doc_values_producer, - doc_values_local_preload: Arc::new(RwLock::new(doc_values_local_preload)), - doc_values_local, + dv_producers_preload: Arc::new(RwLock::new(dv_producers_preload)), + dv_producer_local, + dv_providers_preload: Arc::new(RwLock::new(dv_providers_preload)), + dv_provider_local, } } @@ -730,7 +724,7 @@ impl SegmentReader { } impl SegmentReader { - fn init_doc_values_producer( + fn get_dv_producer( core: &SegmentCoreReaders, si: &SegmentCommitInfo, field_infos: Arc, @@ -751,7 +745,7 @@ impl SegmentReader { } else { // simple case, no DocValues updates if let Ok(dv_producer) = - SegmentDocValues::get_doc_values_producer(-1_i64, &si, dir, field_infos) + SegmentDocValues::get_dv_producer(-1_i64, &si, dir, field_infos) { Some(Arc::from(dv_producer)) } else { @@ -760,61 +754,32 @@ impl SegmentReader { } } - fn init_local_doc_values_producer(&self) -> Result<()> { + fn init_local_dv_producer(&self) -> Result<()> { if self.field_infos.has_doc_values { - if self.doc_values_producer.get().is_some() { + if self.dv_producer_local.get().is_some() { return Ok(()); } - if let Some(dv_producer) = self.doc_values_producer_preload.write()?.pop() { - self.doc_values_producer.get_or(|| Box::new(dv_producer)); + if let Some(dv_producer) = self.dv_producers_preload.write()?.pop() { + self.dv_producer_local.get_or(|| Box::new(dv_producer)); return Ok(()); } - if self.si.has_field_updates() { - let dv_producer = if let Some(ref cfs_dir) = self.core.cfs_reader { - SegmentDocValues::new( - &self.si, - Arc::clone(cfs_dir), - Arc::clone(&self.field_infos), - ) - } else { - SegmentDocValues::new( - &self.si, - Arc::clone(&self.si.info.directory), - Arc::clone(&self.field_infos), - ) - }; - self.doc_values_producer - .get_or(|| Box::new(Arc::from(dv_producer))); - } else { - let dv_producer = if let Some(ref cfs_dir) = self.core.cfs_reader { - SegmentDocValues::get_doc_values_producer( - -1_i64, - &self.si, - Arc::clone(cfs_dir), - Arc::clone(&self.field_infos), - )? - } else { - SegmentDocValues::get_doc_values_producer( - -1_i64, - &self.si, - Arc::clone(&self.si.info.directory), - Arc::clone(&self.field_infos), - )? - }; - - self.doc_values_producer - .get_or(|| Box::new(Arc::from(dv_producer))); + if let Some(dv_producer) = SegmentReader::get_dv_producer( + self.core.as_ref(), + self.si.as_ref(), + self.field_infos.clone(), + ) { + self.dv_producer_local.get_or(|| Box::new(dv_producer)); } } Ok(()) } - fn init_field_infos( - si: &SegmentCommitInfo, - core: &SegmentCoreReaders, + fn init_field_infos( + si: &SegmentCommitInfo, + core: &SegmentCoreReaders, ) -> Result> { if !si.has_field_updates() { Ok(Arc::clone(&core.core_field_infos)) @@ -831,11 +796,11 @@ impl SegmentReader { } } - fn doc_values_local(&self) -> Result<&RefCell>> { - self.init_local_doc_values_producer()?; + fn dv_provider_local(&self) -> Result<&RefCell>> { + self.init_local_dv_producer()?; - Ok(self.doc_values_local.get_or(|| { - if let Some(dv_preload) = self.doc_values_local_preload.write().unwrap().pop() { + Ok(self.dv_provider_local.get_or(|| { + if let Some(dv_preload) = self.dv_providers_preload.write().unwrap().pop() { Box::new(dv_preload) } else { Box::new(RefCell::new(HashMap::new())) @@ -939,7 +904,7 @@ where fn get_numeric_doc_values(&self, field: &str) -> Result> { match self - .doc_values_local()? + .dv_provider_local()? .borrow_mut() .entry(String::from(field)) { @@ -951,8 +916,8 @@ where ))), }, Entry::Vacant(v) => match self.get_dv_field(field, DocValuesType::Numeric) { - Some(fi) if self.doc_values_producer.get().is_some() => { - let dv_producer = self.doc_values_producer.get().unwrap(); + Some(fi) if self.dv_producer_local.get().is_some() => { + let dv_producer = self.dv_producer_local.get().unwrap(); let cell = dv_producer.get_numeric(fi)?; v.insert(DocValuesProviderEnum::Numeric(Arc::clone(&cell))); cell.get() @@ -967,7 +932,7 @@ where fn get_binary_doc_values(&self, field: &str) -> Result> { match self - .doc_values_local()? + .dv_provider_local()? .borrow_mut() .entry(String::from(field)) { @@ -979,8 +944,8 @@ where ))), }, Entry::Vacant(v) => match self.get_dv_field(field, DocValuesType::Binary) { - Some(fi) if self.doc_values_producer.get().is_some() => { - let dv_producer = self.doc_values_producer.get().unwrap(); + Some(fi) if self.dv_producer_local.get().is_some() => { + let dv_producer = self.dv_producer_local.get().unwrap(); let dv = dv_producer.get_binary(fi)?; v.insert(DocValuesProviderEnum::Binary(Arc::clone(&dv))); dv.get() @@ -995,7 +960,7 @@ where fn get_sorted_doc_values(&self, field: &str) -> Result> { match self - .doc_values_local()? + .dv_provider_local()? .borrow_mut() .entry(String::from(field)) { @@ -1007,8 +972,8 @@ where ))), }, Entry::Vacant(v) => match self.get_dv_field(field, DocValuesType::Sorted) { - Some(fi) if self.doc_values_producer.get().is_some() => { - let dv_producer = self.doc_values_producer.get().unwrap(); + Some(fi) if self.dv_producer_local.get().is_some() => { + let dv_producer = self.dv_producer_local.get().unwrap(); let dv = dv_producer.get_sorted(fi)?; v.insert(DocValuesProviderEnum::Sorted(Arc::clone(&dv))); dv.get() @@ -1026,7 +991,7 @@ where field: &str, ) -> Result> { match self - .doc_values_local()? + .dv_provider_local()? .borrow_mut() .entry(String::from(field)) { @@ -1038,8 +1003,8 @@ where ))), }, Entry::Vacant(v) => match self.get_dv_field(field, DocValuesType::SortedNumeric) { - Some(fi) if self.doc_values_producer.get().is_some() => { - let dv_producer = self.doc_values_producer.get().unwrap(); + Some(fi) if self.dv_producer_local.get().is_some() => { + let dv_producer = self.dv_producer_local.get().unwrap(); let dv = dv_producer.get_sorted_numeric(fi)?; let cell = dv; v.insert(DocValuesProviderEnum::SortedNumeric(Arc::clone(&cell))); @@ -1055,7 +1020,7 @@ where fn get_sorted_set_doc_values(&self, field: &str) -> Result> { match self - .doc_values_local()? + .dv_provider_local()? .borrow_mut() .entry(String::from(field)) { @@ -1067,8 +1032,8 @@ where ))), }, Entry::Vacant(v) => match self.get_dv_field(field, DocValuesType::SortedSet) { - Some(fi) if self.doc_values_producer.get().is_some() => { - let dv_producer = self.doc_values_producer.get().unwrap(); + Some(fi) if self.dv_producer_local.get().is_some() => { + let dv_producer = self.dv_producer_local.get().unwrap(); let dv = dv_producer.get_sorted_set(fi)?; let cell = dv; v.insert(DocValuesProviderEnum::SortedSet(Arc::clone(&cell))); @@ -1099,9 +1064,9 @@ where match self.field_infos.field_info_by_name(field) { Some(fi) if fi.doc_values_type != DocValuesType::Null - && self.doc_values_producer.get().is_some() => + && self.dv_producer_local.get().is_some() => { - let dv_producer = self.doc_values_producer.get().unwrap(); + let dv_producer = self.dv_producer_local.get().unwrap(); dv_producer.get_docs_with_field(fi) } @@ -1147,8 +1112,8 @@ where } fn doc_values_reader(&self) -> Result>> { - self.init_local_doc_values_producer()?; - Ok(self.doc_values_producer.get().map(Arc::clone)) + self.init_local_dv_producer()?; + Ok(self.dv_producer_local.get().map(Arc::clone)) } fn postings_reader(&self) -> Result { diff --git a/src/core/search/query/boolean_query.rs b/src/core/search/query/boolean_query.rs index 5445f1c..0baf777 100644 --- a/src/core/search/query/boolean_query.rs +++ b/src/core/search/query/boolean_query.rs @@ -223,7 +223,8 @@ impl Weight for BooleanWeight { } match scorers.len() { 0 => None, - 1 => Some(scorers.remove(0)), + // min_should_match + // 1 => Some(scorers.remove(0)), _ => Some(Box::new(DisjunctionSumScorer::new( scorers, self.needs_scores, diff --git a/src/core/search/query/phrase_query.rs b/src/core/search/query/phrase_query.rs index 6650b49..4a99d06 100644 --- a/src/core/search/query/phrase_query.rs +++ b/src/core/search/query/phrase_query.rs @@ -291,7 +291,14 @@ impl Weight for PhraseWeight { let mut total_match_cost = 0f32; for i in 0..self.terms.len() { - term_iter.seek_exact(self.terms[i].bytes.as_ref())?; + if !term_iter.seek_exact(self.terms[i].bytes.as_ref())? { + return Err(format!( + "term={} does not exist", + String::from_utf8(self.terms[i].bytes.clone()).unwrap() + ) + .into()); + } + total_match_cost += self.term_positions_cost(&mut term_iter)?; postings_freqs.push(PostingsAndFreq::new( diff --git a/src/core/search/scorer/disjunction_scorer.rs b/src/core/search/scorer/disjunction_scorer.rs index 6da3644..5f1371b 100644 --- a/src/core/search/scorer/disjunction_scorer.rs +++ b/src/core/search/scorer/disjunction_scorer.rs @@ -18,6 +18,8 @@ use core::util::{DisiPriorityQueue, DocId}; use error::Result; use std::f32; +pub const DEFAULT_MIN_SHOULD_MATCH: i32 = 1; + /// A Scorer for OR like queries, counterpart of `ConjunctionScorer`. pub struct DisjunctionSumScorer { sub_scorers: SubScorers, @@ -32,11 +34,11 @@ impl DisjunctionSumScorer { needs_scores: bool, min_should_match: i32, ) -> DisjunctionSumScorer { - assert!(children.len() > 1); + debug_assert!(children.len() > 0); let cost = children.iter().map(|w| w.cost()).sum(); - let sub_scorers = if children.len() < 10 || min_should_match > 1 { + let sub_scorers = if children.len() < 10 || min_should_match > DEFAULT_MIN_SHOULD_MATCH { SubScorers::SQ(SimpleQueue::new(children)) } else { SubScorers::DPQ(DisiPriorityQueue::new(children)) @@ -87,8 +89,13 @@ impl DocIterator for DisjunctionSumScorer { } fn approximate_next(&mut self) -> Result { - self.sub_scorers - .approximate_next(Some(self.min_should_match)) + let min_should_match = if self.min_should_match > DEFAULT_MIN_SHOULD_MATCH { + Some(self.min_should_match) + } else { + None + }; + + self.sub_scorers.approximate_next(min_should_match) } fn approximate_advance(&mut self, target: DocId) -> Result { @@ -114,7 +121,7 @@ impl DisjunctionMaxScorer { tie_breaker_multiplier: f32, needs_scores: bool, ) -> DisjunctionMaxScorer { - assert!(children.len() > 1); + debug_assert!(children.len() > 0); let cost = children.iter().map(|w| w.cost()).sum(); @@ -286,28 +293,42 @@ impl SubScorers { } fn approximate_next(&mut self, min_should_match: Option) -> Result { - let min_should_match = min_should_match.unwrap_or(0); - match self { SubScorers::SQ(sq) => { + let min_should_match = min_should_match.unwrap_or(DEFAULT_MIN_SHOULD_MATCH); + loop { - // curr_doc = current min_doc, (not -1) + if sq.curr_doc == NO_MORE_DOCS { + return Ok(sq.curr_doc); + } + + // curr_doc begin with -1 let curr_doc = sq.curr_doc; let mut min_doc = NO_MORE_DOCS; - let mut should_count = 0; for s in sq.scorers.iter_mut() { if s.doc_id() == curr_doc { - should_count += 1; s.approximate_next()?; } min_doc = min_doc.min(s.doc_id()); } - sq.curr_doc = min_doc; - if should_count >= min_should_match || sq.curr_doc == NO_MORE_DOCS { - return Ok(sq.curr_doc); + + if min_should_match > DEFAULT_MIN_SHOULD_MATCH { + let mut should_count = 0; + + for s in sq.scorers.iter_mut() { + if s.doc_id() == min_doc { + should_count += 1; + } + } + + if should_count < min_should_match { + continue; + } } + + return Ok(sq.curr_doc); } } SubScorers::DPQ(dbq) => { diff --git a/src/core/search/scorer/req_opt_scorer.rs b/src/core/search/scorer/req_opt_scorer.rs index 61b65b1..c65adb5 100644 --- a/src/core/search/scorer/req_opt_scorer.rs +++ b/src/core/search/scorer/req_opt_scorer.rs @@ -44,7 +44,7 @@ impl Scorer for ReqOptScorer { let mut score = self.req_scorer.score()?; if self.scores_num > OPT_SCORE_THRESHOLD { - if score < self.scores_sum / self.scores_num as f32 { + if 2.0 * score < self.scores_sum / self.scores_num as f32 { return Ok(score); } }