diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 2323806d12..f2e5477501 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -1651,6 +1651,7 @@ mod tests { force_end_merge: bool, ) -> crate::Result { let mut schema_builder = schema::Schema::builder(); + let json_field = schema_builder.add_json_field("json", FAST | TEXT | STORED); let ip_field = schema_builder.add_ip_addr_field("ip", FAST | INDEXED | STORED); let ips_field = schema_builder .add_ip_addr_field("ips", IpAddrOptions::default().set_fast().set_indexed()); @@ -1729,7 +1730,9 @@ mod tests { id_field=>id, ))?; } else { + let json = json!({"date1": format!("2022-{id}-01T00:00:01Z"), "date2": format!("{id}-05-01T00:00:01Z"), "id": id, "ip": ip.to_string()}); index_writer.add_document(doc!(id_field=>id, + json_field=>json, bytes_field => id.to_le_bytes().as_slice(), id_opt_field => id, ip_field => ip, diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 87bc4c8c8c..8612f66c55 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -605,6 +605,10 @@ impl IndexMerger { segment_postings.positions(&mut positions_buffer); segment_postings.term_freq() } else { + // The positions_buffer may contain positions from the previous term + // Existence of positions depend on the value type in JSON fields. + // https://github.com/quickwit-oss/tantivy/issues/2283 + positions_buffer.clear(); 0u32 }; diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 1888f3b47e..c0bd8d4403 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -879,6 +879,31 @@ mod tests { assert_eq!(searcher.search(&phrase_query, &Count).unwrap(), 0); } + #[test] + fn test_json_term_with_numeric_merge_panic_regression_bug_2283() { + // https://github.com/quickwit-oss/tantivy/issues/2283 + let mut schema_builder = Schema::builder(); + let json = schema_builder.add_json_field("json", TEXT); + let schema = schema_builder.build(); + let index = Index::create_in_ram(schema); + let mut writer = index.writer_for_tests().unwrap(); + let doc = json!({"field": "a"}); + writer.add_document(doc!(json=>doc)).unwrap(); + writer.commit().unwrap(); + let doc = json!({"field": "a", "id": 1}); + writer.add_document(doc!(json=>doc.clone())).unwrap(); + writer.commit().unwrap(); + + // Force Merge + writer.wait_merging_threads().unwrap(); + let mut index_writer: IndexWriter = index.writer_for_tests().unwrap(); + let segment_ids = index + .searchable_segment_ids() + .expect("Searchable segments failed."); + index_writer.merge(&segment_ids).wait().unwrap(); + assert!(index_writer.wait_merging_threads().is_ok()); + } + #[test] fn test_bug_regression_1629_position_when_array_with_a_field_value_that_does_not_contain_any_token( ) {