This repository has been archived by the owner on Apr 4, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a fuzz test for index operations (clear, import, delete, settings)
It is very limited so far. It is meant to catch bugs with soft-deleted document ids.
- Loading branch information
Showing
4 changed files
with
356 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,351 @@ | ||
// Things tested by this fuzz test | ||
// | ||
// - A few different document identifiers only | ||
// - Simple setting updates (searchable and filterable attributes only) | ||
// - Document Deletion (given existing or unexisting external document ids) | ||
// - Clear Documents | ||
// - Batched document imports | ||
// - Update/Replacememt of existing documents | ||
// - Each operation with and without soft deletion | ||
// - Empty document imports | ||
// - No crash should ever happen | ||
|
||
// A small sample of what isn't tested: | ||
// | ||
// - The correctness of the indexing operations | ||
// - Indexing mistakes that happen when many different documents are inserted | ||
// - Long batches of document imports | ||
// - Nested fields (not tested well anyway) | ||
// - Any search result | ||
// - Arbitrary document contents | ||
// (instead, the components of the documents are pre-written manually) | ||
// - Index creation / Deletion | ||
// - Autogenerated docids | ||
// - Indexing for geosearch | ||
// - Documents with too many field ids or too many words in a field id | ||
// - Anything related to the prefix databases | ||
// - Incorrect setting updates | ||
// - The logic that chooses between soft and hard deletion | ||
// (the choice is instead set manually for each operation) | ||
// - Different IndexerConfig parameters | ||
|
||
// Efficiency tips: | ||
// | ||
// - Use a RAM disk (see https://stackoverflow.com/questions/46224103/create-apfs-ram-disk-on-macos-high-sierra) | ||
// - change the value of the TMPDIR environment variable to a folder in the RAM disk | ||
|
||
// Quality: | ||
// - finds issue 2945 if any of the last two fixes are not present (within a few minutes) | ||
// - issue 2945: https://github.com/meilisearch/meilisearch/issues/2945 | ||
// - fix 1: https://github.com/meilisearch/milli/pull/723 | ||
// - fix 2: https://github.com/meilisearch/milli/pull/734 | ||
// - but doesn't detect anything wrong if this fix is not included: https://github.com/meilisearch/milli/pull/690 | ||
// - because it doesn't cause any crash, I think | ||
// - each fuzz test iteration is quite slow | ||
// - for this fuzz test in particular, it is good to let it run for a few hours, or even a day | ||
|
||
use std::hash::Hash; | ||
use std::sync::LazyLock; | ||
|
||
use fuzzcheck::mutators::integer_within_range::U8WithinRangeMutator; | ||
use fuzzcheck::mutators::option::OptionMutator; | ||
use fuzzcheck::mutators::unique::UniqueMutator; | ||
use fuzzcheck::mutators::vector::VecMutator; | ||
use fuzzcheck::DefaultMutator; | ||
use heed::{EnvOpenOptions, RwTxn}; | ||
use serde::{Deserialize, Serialize}; | ||
use tempfile::TempDir; | ||
|
||
use super::{ | ||
ClearDocuments, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings, | ||
}; | ||
use crate::Index; | ||
|
||
/// The list of document identifiers that we choose to test | ||
static DOCUMENT_IDENTIFIERS: LazyLock<Vec<serde_json::Value>> = LazyLock::new(|| { | ||
let mut ids = vec![]; | ||
for i in 0..10 { | ||
ids.push(serde_json::json!(i)); | ||
ids.push(serde_json::json!(format!("{i}"))); | ||
} | ||
ids.push(serde_json::json!("complex-ID-1_2")); | ||
ids.push(serde_json::json!("1-2-3-4")); | ||
ids.push(serde_json::json!("invalidsupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocious")); | ||
ids.push(serde_json::json!("invalid.id")); | ||
ids | ||
}); | ||
|
||
/// The list of field values that we choose to test | ||
static FIELD_VALUES: LazyLock<Vec<serde_json::Value>> = LazyLock::new(|| { | ||
let mut vals = vec![]; | ||
for i in 0..10i32 { | ||
vals.push(serde_json::json!(i)); | ||
vals.push(serde_json::json!((i as f64) / 3.4)); | ||
vals.push(serde_json::json!(111.1_f32.powi(i))); | ||
vals.push(serde_json::json!(format!("{i}"))); | ||
vals.push(serde_json::json!([i - 1, format!("{i}"), i + 1, format!("{}", i - 1), i - 2])); | ||
vals.push(serde_json::json!(format!("{}", "a".repeat(i as usize)))); | ||
} | ||
vals.push(serde_json::json!({ "nested": ["value", { "nested": ["value", "value", "the quick brown fox jumps over the lazy dog, wow!"] }], "value": 0})); | ||
vals.push(serde_json::json!("the quick brown fox jumps over the lazy dog, wow!")); | ||
vals.push(serde_json::json!("the quick brown supercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocioussupercalifragilisticexpialidocious fox jumps over the lazy dog")); | ||
vals.push(serde_json::json!({ "lat": 23.0, "lon": 22.1 })); | ||
vals.push(serde_json::json!({ "lat": 23.0, "lon": 22.1, "other": 10.0 })); | ||
vals.push(serde_json::json!({ "lat": -23.0, "lon": -22.1 })); | ||
vals.push(serde_json::json!({ "lat": 93.0, "lon": 22.1 })); | ||
vals.push(serde_json::json!({ "lat": 90.0, "lon": 221.1 })); | ||
vals | ||
}); | ||
/// The list of field keys that we choose to test | ||
static FIELD_KEYS: LazyLock<Vec<String>> = LazyLock::new(|| { | ||
let mut keys = vec![]; | ||
for f in ["identifier", "field1", "field2", "_geo"] { | ||
keys.push(f.to_owned()); | ||
for g in [ | ||
"nested", | ||
"value", | ||
"nested.value", | ||
"nested.value.nested", | ||
"_geo", | ||
"lat", | ||
"lon", | ||
"other", | ||
] { | ||
let mut key = f.to_owned(); | ||
key.push('.'); | ||
key.push_str(g); | ||
keys.push(key); | ||
} | ||
} | ||
keys | ||
}); | ||
fn document_identifier(i: u8) -> serde_json::Value { | ||
DOCUMENT_IDENTIFIERS[i as usize].clone() | ||
} | ||
fn field_key(i: u8) -> String { | ||
FIELD_KEYS[i as usize].clone() | ||
} | ||
fn field_value(i: u8) -> serde_json::Value { | ||
FIELD_VALUES[i as usize].clone() | ||
} | ||
fn document_identifier_index_mutator() -> U8WithinRangeMutator { | ||
U8WithinRangeMutator::new(..DOCUMENT_IDENTIFIERS.len() as u8) | ||
} | ||
fn field_key_index_mutator() -> U8WithinRangeMutator { | ||
U8WithinRangeMutator::new(..FIELD_KEYS.len() as u8) | ||
} | ||
fn field_value_index_mutator() -> U8WithinRangeMutator { | ||
U8WithinRangeMutator::new(..FIELD_VALUES.len() as u8) | ||
} | ||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] | ||
enum Operation { | ||
SettingsUpdate(SettingsUpdate), | ||
DocumentImport(DocumentImport), | ||
DocumentDeletion(DocumentDeletion), | ||
Clear, | ||
} | ||
#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] | ||
enum Method { | ||
Update, | ||
Replace, | ||
} | ||
#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] | ||
struct DocumentImport { | ||
disable_soft_deletion: bool, | ||
method: Method, | ||
documents: DocumentImportBatch, | ||
} | ||
#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] | ||
struct SettingsUpdate { | ||
// Adding filterable fields slows down the fuzzer a lot | ||
// #[field_mutator(OptionMutator<Vec<u8>, VecMutator<u8, U8WithinRangeMutator>> = { | ||
// OptionMutator::new(VecMutator::new(field_key_index_mutator(), 0..=10)) | ||
// })] | ||
// filterable_fields: Option<Vec<u8>>, | ||
#[field_mutator(OptionMutator<Vec<u8>, VecMutator<u8, U8WithinRangeMutator>> = { | ||
OptionMutator::new(VecMutator::new(field_key_index_mutator(), 0..=10)) | ||
})] | ||
searchable_fields: Option<Vec<u8>>, | ||
} | ||
#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] | ||
struct DocumentDeletion { | ||
disable_soft_deletion: bool, | ||
#[field_mutator(VecMutator<u8, U8WithinRangeMutator> = { | ||
VecMutator::new(document_identifier_index_mutator(), 0..=10) | ||
})] | ||
external_document_ids: Vec<u8>, | ||
} | ||
#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] | ||
struct Document { | ||
#[field_mutator(U8WithinRangeMutator = { document_identifier_index_mutator() })] | ||
identifier: u8, | ||
#[field_mutator(OptionMutator<u8, U8WithinRangeMutator> = { | ||
OptionMutator::new(field_value_index_mutator()) | ||
})] | ||
field1: Option<u8>, | ||
#[field_mutator(OptionMutator<u8, U8WithinRangeMutator> = { | ||
OptionMutator::new(field_value_index_mutator()) | ||
})] | ||
field2: Option<u8>, | ||
} | ||
#[derive(Debug, Clone, Serialize, Deserialize, DefaultMutator, PartialEq, Eq, Hash)] | ||
struct DocumentImportBatch { | ||
#[field_mutator(VecMutator<Document, DocumentMutator> = { | ||
VecMutator::new(Document::default_mutator(), 0..=10) | ||
})] | ||
docs1: Vec<Document>, | ||
#[field_mutator(VecMutator<Document, DocumentMutator> = { | ||
VecMutator::new(Document::default_mutator(), 0..=5) | ||
})] | ||
docs2: Vec<Document>, | ||
} | ||
|
||
fn apply_document_deletion<'i>( | ||
wtxn: &mut RwTxn<'i, '_>, | ||
index: &'i Index, | ||
deletion: &DocumentDeletion, | ||
) { | ||
let DocumentDeletion { disable_soft_deletion, external_document_ids } = deletion; | ||
let mut builder = DeleteDocuments::new(wtxn, index).unwrap(); | ||
builder.disable_soft_deletion(*disable_soft_deletion); | ||
for id in external_document_ids { | ||
let id = document_identifier(*id); | ||
let id = match id { | ||
serde_json::Value::Number(n) => format!("{n}"), | ||
serde_json::Value::String(s) => s, | ||
_ => panic!(), | ||
}; | ||
let _ = builder.delete_external_id(id.as_str()); | ||
} | ||
builder.execute().unwrap(); | ||
} | ||
|
||
fn apply_document_import<'i>(wtxn: &mut RwTxn<'i, '_>, index: &'i Index, import: &DocumentImport) { | ||
let DocumentImport { | ||
disable_soft_deletion, | ||
method, | ||
documents: DocumentImportBatch { docs1, docs2 }, | ||
} = import; | ||
let indexer_config = IndexerConfig::default(); | ||
let mut builder = IndexDocuments::new( | ||
wtxn, | ||
index, | ||
&indexer_config, | ||
IndexDocumentsConfig { | ||
update_method: match method { | ||
Method::Update => super::IndexDocumentsMethod::UpdateDocuments, | ||
Method::Replace => super::IndexDocumentsMethod::ReplaceDocuments, | ||
}, | ||
disable_soft_deletion: *disable_soft_deletion, | ||
autogenerate_docids: false, | ||
..IndexDocumentsConfig::default() | ||
}, | ||
|_| {}, | ||
|| false, | ||
) | ||
.unwrap(); | ||
|
||
let make_real_docs = |docs: &Vec<Document>| { | ||
docs.iter() | ||
.map(|doc| { | ||
let Document { identifier, field1, field2 } = doc; | ||
let mut object = crate::Object::new(); | ||
let identifier = document_identifier(*identifier); | ||
object.insert("identifier".to_owned(), serde_json::json!(identifier)); | ||
if let Some(field1) = field1 { | ||
let field1 = field_value(*field1); | ||
object.insert("field1".to_owned(), field1); | ||
} | ||
if let Some(field2) = field2 { | ||
let field2 = field_value(*field2); | ||
object.insert("field2".to_owned(), field2); | ||
} | ||
object | ||
}) | ||
.collect::<Vec<_>>() | ||
}; | ||
|
||
let docs1 = make_real_docs(docs1); | ||
|
||
let (new_builder, _user_error) = builder.add_documents(documents!(docs1)).unwrap(); | ||
builder = new_builder; | ||
|
||
let docs2 = make_real_docs(docs2); | ||
|
||
let (new_builder, _user_error) = builder.add_documents(documents!(docs2)).unwrap(); | ||
builder = new_builder; | ||
|
||
let _ = builder.execute().unwrap(); | ||
} | ||
|
||
fn apply_settings_update<'i>( | ||
wtxn: &mut RwTxn<'i, '_>, | ||
index: &'i Index, | ||
settings: &SettingsUpdate, | ||
) { | ||
let SettingsUpdate { searchable_fields /* , filterable_fields */ } = settings; | ||
let indexer_config = IndexerConfig::default(); | ||
let mut settings = Settings::new(wtxn, index, &indexer_config); | ||
// match filterable_fields { | ||
// Some(fields) => { | ||
// let fields = fields.iter().map(|f| field_key(*f)).collect(); | ||
// settings.set_filterable_fields(fields); | ||
// } | ||
// None => settings.reset_filterable_fields(), | ||
// } | ||
match searchable_fields { | ||
Some(fields) => { | ||
let fields = fields.iter().map(|f| field_key(*f)).collect(); | ||
settings.set_searchable_fields(fields); | ||
} | ||
None => settings.reset_searchable_fields(), | ||
} | ||
settings.execute(|_| {}, || false).unwrap(); | ||
} | ||
|
||
fn apply_operation<'i>(wtxn: &mut RwTxn<'i, '_>, index: &'i Index, operation: &Operation) { | ||
match operation { | ||
Operation::SettingsUpdate(settings) => apply_settings_update(wtxn, index, settings), | ||
Operation::DocumentImport(import) => apply_document_import(wtxn, index, import), | ||
Operation::DocumentDeletion(deletion) => apply_document_deletion(wtxn, index, deletion), | ||
Operation::Clear => { | ||
let builder = ClearDocuments::new(wtxn, index); | ||
let _result = builder.execute().unwrap(); | ||
} | ||
} | ||
} | ||
|
||
#[test] | ||
fn fuzz() { | ||
let tempdir = TempDir::new_in("/Volumes/Ramdisk").unwrap(); | ||
|
||
let mut options = EnvOpenOptions::new(); | ||
options.map_size(4096 * 1000 * 1000); | ||
|
||
let index = { | ||
let index = Index::new(options, tempdir.path()).unwrap(); | ||
let mut wtxn = index.write_txn().unwrap(); | ||
let indexer_config = IndexerConfig::default(); | ||
let mut settings = Settings::new(&mut wtxn, &index, &indexer_config); | ||
settings.set_primary_key("identifier".to_owned()); | ||
settings.execute(|_| {}, || false).unwrap(); | ||
wtxn.commit().unwrap(); | ||
index | ||
}; | ||
|
||
let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| { | ||
let mut wtxn = index.write_txn().unwrap(); | ||
for operation in operations { | ||
apply_operation(&mut wtxn, &index, operation); | ||
} | ||
wtxn.abort().unwrap(); | ||
}) | ||
// We use a bloom filter (through UniqueMutator) to prevent the same test input from being tested too many times | ||
.mutator(UniqueMutator::new(VecMutator::new(Operation::default_mutator(), 0..=20), |x| x)) | ||
.serde_serializer() | ||
.default_sensor_and_pool() | ||
.arguments_from_cargo_fuzzcheck() | ||
.launch(); | ||
assert!(!result.found_test_failure); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters