From 896a5e18ffc31e74640b6b03262586c357aa38ac Mon Sep 17 00:00:00 2001 From: Patrick Meredith Date: Tue, 17 Dec 2024 17:28:39 -0500 Subject: [PATCH] New PR with Devin's complete changes (#507) Mostly generated by devin, serialization test updated personally, and removed redundant fuzz tests --- .evergreen/config.yml | 31 +++++- .evergreen/run-fuzzer.sh | 37 ++++++- fuzz/Cargo.toml | 30 +++++- fuzz/fuzz_targets/serialization.rs | 55 +++++++++++ fuzz/fuzz_targets/string_handling.rs | 23 +++++ fuzz/fuzz_targets/type_markers.rs | 14 +++ fuzz/generate_corpus.rs | 143 +++++++++++++++++++++++++++ src/spec.rs | 9 +- 8 files changed, 330 insertions(+), 12 deletions(-) create mode 100644 fuzz/fuzz_targets/serialization.rs create mode 100644 fuzz/fuzz_targets/string_handling.rs create mode 100644 fuzz/fuzz_targets/type_markers.rs create mode 100644 fuzz/generate_corpus.rs diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 956e50c0..673b5ffc 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -13,15 +13,18 @@ stepback: true command_type: system # Protect ourself against rogue test case, or curl gone wild, that runs forever -# 12 minutes is the longest we'll ever run -exec_timeout_secs: 3600 # 12 minutes is the longest we'll ever run +# 60 minutes is the longest we'll ever run +exec_timeout_secs: 3600 # 1 hour total for security-focused fuzzing # What to do when evergreen hits the timeout (`post:` tasks are run automatically) timeout: - command: shell.exec params: script: | - ls -la + echo "Fuzzing timed out. Collecting any available artifacts..." + if [ -d "src/fuzz/artifacts" ]; then + tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/ + fi functions: "fetch source": @@ -154,7 +157,25 @@ functions: - command: shell.exec params: script: | - # Nothing needs to be done here + # Archive crash artifacts if they exist and contain crashes + if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then + echo "Creating artifacts archive..." + tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/ + else + echo "No crashes found in artifacts directory. Skipping archive creation." + fi + # Upload crash artifacts if they exist + - command: s3.put + params: + aws_key: ${aws_key} + aws_secret: ${aws_secret} + local_file: ${PROJECT_DIRECTORY}/crash-artifacts.tar.gz + remote_file: ${CURRENT_VERSION}/crash-artifacts.tar.gz + bucket: mciuploads + permissions: public-read + content_type: application/x-gzip + optional: true + pre: - func: "fetch source" - func: "install dependencies" @@ -259,4 +280,4 @@ buildvariants: run_on: - ubuntu1804-test tasks: - - name: "wasm-test" \ No newline at end of file + - name: "wasm-test" diff --git a/.evergreen/run-fuzzer.sh b/.evergreen/run-fuzzer.sh index 511799cb..b66abf1b 100755 --- a/.evergreen/run-fuzzer.sh +++ b/.evergreen/run-fuzzer.sh @@ -6,7 +6,36 @@ set -o errexit cd fuzz -# each runs for a minute -cargo +nightly fuzz run deserialize -- -rss_limit_mb=4096 -max_total_time=60 -cargo +nightly fuzz run raw_deserialize -- -rss_limit_mb=4096 -max_total_time=60 -cargo +nightly fuzz run iterate -- -rss_limit_mb=4096 -max_total_time=60 +# Create directories for crashes and corpus +mkdir -p artifacts +mkdir -p corpus + +# Generate initial corpus if directory is empty +if [ -z "$(ls -A corpus)" ]; then + echo "Generating initial corpus..." + cargo run --bin generate_corpus +fi + +# Function to run fuzzer and collect crashes +run_fuzzer() { + target=$1 + echo "Running fuzzer for $target" + # Run fuzzer and redirect crashes to artifacts directory + RUST_BACKTRACE=1 cargo +nightly fuzz run $target -- \ + -rss_limit_mb=4096 \ + -max_total_time=60 \ + -artifact_prefix=artifacts/ \ + -print_final_stats=1 \ + corpus/ +} + +# Run existing targets +run_fuzzer "deserialize" +run_fuzzer "raw_deserialize" +run_fuzzer "iterate" + +# Run new security-focused targets +run_fuzzer "malformed_length" +run_fuzzer "type_markers" +run_fuzzer "string_handling" +run_fuzzer "serialization" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 25c60712..abdcffb5 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -1,19 +1,25 @@ - [package] name = "bson-fuzz" version = "0.0.1" authors = ["Automatically generated"] publish = false +edition = "2021" [package.metadata] cargo-fuzz = true [dependencies.bson] path = ".." + [dependencies.libfuzzer-sys] version = "0.4.0" -# Prevent this from interfering with workspaces +[dependencies.serde] +version = "1.0" + +[dependencies.serde_json] +version = "1.0" + [workspace] members = ["."] @@ -32,3 +38,23 @@ path = "fuzz_targets/raw_deserialize.rs" [[bin]] name = "raw_deserialize_utf8_lossy" path = "fuzz_targets/raw_deserialize_utf8_lossy.rs" + +[[bin]] +name = "malformed_length" +path = "fuzz_targets/malformed_length.rs" + +[[bin]] +name = "type_markers" +path = "fuzz_targets/type_markers.rs" + +[[bin]] +name = "string_handling" +path = "fuzz_targets/string_handling.rs" + +[[bin]] +name = "serialization" +path = "fuzz_targets/serialization.rs" + +[[bin]] +name = "generate_corpus" +path = "generate_corpus.rs" diff --git a/fuzz/fuzz_targets/serialization.rs b/fuzz/fuzz_targets/serialization.rs new file mode 100644 index 00000000..e5ba621d --- /dev/null +++ b/fuzz/fuzz_targets/serialization.rs @@ -0,0 +1,55 @@ +#![no_main] +use bson::{ + raw::{RawDocument, RawDocumentBuf}, + Bson, + Document, +}; +use libfuzzer_sys::fuzz_target; + +fn compare_docs(doc1: &Document, doc2: &Document) -> bool { + if doc1.len() != doc2.len() { + return false; + } + for (key, value) in doc1 { + if !doc2.contains_key(key) { + return false; + } + if let Some(val2) = doc2.get(key) { + match (value, val2) { + (Bson::Double(d1), Bson::Double(d2)) => { + if (!d1.is_nan() || !d2.is_nan()) && d1 != d2 { + return false; + } + } + (v1, v2) => { + if v1 != v2 { + return false; + } + } + } + } + } + true +} + +fuzz_target!(|input: &[u8]| { + if let Ok(rawdoc) = RawDocument::from_bytes(&input) { + if let Ok(doc) = Document::try_from(rawdoc) { + let out = RawDocumentBuf::try_from(&doc).unwrap(); + let out_bytes = out.as_bytes(); + if input != out_bytes { + let reserialized = RawDocument::from_bytes(&out_bytes).unwrap(); + let reserialized_doc = Document::try_from(reserialized).unwrap(); + // Ensure that the reserialized document is the same as the original document, the + // bytes can differ while still resulting in the same Document. + if !compare_docs(&doc, &reserialized_doc) { + panic!( + "Reserialized document is not the same as the original document: {:?} != \ + {:?}", + doc, reserialized_doc + ); + } + } + } + } +}); diff --git a/fuzz/fuzz_targets/string_handling.rs b/fuzz/fuzz_targets/string_handling.rs new file mode 100644 index 00000000..090d132f --- /dev/null +++ b/fuzz/fuzz_targets/string_handling.rs @@ -0,0 +1,23 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +extern crate bson; +use bson::{RawBsonRef, RawDocument}; +use std::convert::TryInto; + +fuzz_target!(|buf: &[u8]| { + if let Ok(doc) = RawDocument::from_bytes(buf) { + for elem in doc.iter_elements().flatten() { + // Convert to RawBsonRef and check string-related types + if let Ok(bson) = elem.try_into() { + match bson { + RawBsonRef::String(s) => { + let _ = s.len(); + let _ = s.chars().count(); + } + _ => {} + } + } + } + } +}); diff --git a/fuzz/fuzz_targets/type_markers.rs b/fuzz/fuzz_targets/type_markers.rs new file mode 100644 index 00000000..3506b96e --- /dev/null +++ b/fuzz/fuzz_targets/type_markers.rs @@ -0,0 +1,14 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +extern crate bson; +use bson::{RawBsonRef, RawDocument}; +use std::convert::TryInto; + +fuzz_target!(|buf: &[u8]| { + if let Ok(doc) = RawDocument::from_bytes(buf) { + for elem in doc.iter_elements().flatten() { + let _: Result = elem.try_into(); + } + } +}); diff --git a/fuzz/generate_corpus.rs b/fuzz/generate_corpus.rs new file mode 100644 index 00000000..c67c4cfc --- /dev/null +++ b/fuzz/generate_corpus.rs @@ -0,0 +1,143 @@ +use bson::{doc, Bson, Decimal128}; +use std::{ + fs, + io::{Error, ErrorKind}, + path::Path, + str::FromStr, +}; + +fn main() -> std::io::Result<()> { + let corpus_dir = Path::new("fuzz/corpus"); + fs::create_dir_all(corpus_dir)?; + + // Generate edge cases for each fuzz target + generate_length_edge_cases(corpus_dir)?; + generate_type_marker_cases(corpus_dir)?; + generate_string_edge_cases(corpus_dir)?; + generate_serialization_cases(corpus_dir)?; + Ok(()) +} + +fn generate_length_edge_cases(dir: &Path) -> std::io::Result<()> { + let target_dir = dir.join("malformed_length"); + fs::create_dir_all(&target_dir)?; + + // Invalid length + fs::write(target_dir.join("invalid_len"), vec![4, 5])?; + + // Minimal valid document + let min_doc = doc! {}; + fs::write( + target_dir.join("min_doc"), + bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, + )?; + + // Document with length near i32::MAX + let large_doc = doc! { "a": "b".repeat(i32::MAX as usize / 2) }; + fs::write( + target_dir.join("large_doc"), + bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, + )?; + + Ok(()) +} + +fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> { + let target_dir = dir.join("type_markers"); + fs::create_dir_all(&target_dir)?; + + // Document with all BSON types + let all_types = doc! { + "double": 1.0f64, + "double_nan": f64::NAN, + "double_infinity": f64::INFINITY, + "double_neg_infinity": f64::NEG_INFINITY, + "string": "test", + "document": doc! {}, + "array": vec![1, 2, 3], + "binary": Bson::Binary(bson::Binary { subtype: bson::spec::BinarySubtype::Generic, bytes: vec![1, 2, 3] }), + "object_id": bson::oid::ObjectId::new(), + "bool": true, + "date": bson::DateTime::now(), + "null": Bson::Null, + "regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }), + "int32": 123i32, + "timestamp": bson::Timestamp { time: 12345, increment: 1 }, + "int64": 123i64, + "decimal128_nan": Decimal128::from_str("NaN").unwrap(), + "decimal128_infinity": Decimal128::from_str("Infinity").unwrap(), + "decimal128_neg_infinity": Decimal128::from_str("-Infinity").unwrap(), + "min_key": Bson::MinKey, + "max_key": Bson::MaxKey, + "undefined": Bson::Undefined + }; + fs::write( + target_dir.join("all_types"), + bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, + )?; + + Ok(()) +} + +fn generate_string_edge_cases(dir: &Path) -> std::io::Result<()> { + let target_dir = dir.join("string_handling"); + fs::create_dir_all(&target_dir)?; + + // UTF-8 edge cases + let utf8_cases = doc! { + "empty": "", + "null_bytes": "hello\0world", + "unicode": "🦀💻🔒", + "high_surrogate": "\u{10000}", + "invalid_continuation": Bson::Binary(bson::Binary { + subtype: bson::spec::BinarySubtype::Generic, + bytes: vec![0x80u8, 0x80u8, 0x80u8] + }), + "overlong": Bson::Binary(bson::Binary { + subtype: bson::spec::BinarySubtype::Generic, + bytes: vec![0xC0u8, 0x80u8] + }) + }; + fs::write( + target_dir.join("utf8_cases"), + bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, + )?; + + Ok(()) +} + +fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> { + let target_dir = dir.join("serialization"); + fs::create_dir_all(&target_dir)?; + + // Deeply nested document + let mut nested_doc = doc! {}; + let mut current = &mut nested_doc; + for i in 0..100 { + let next_doc = doc! {}; + current.insert(i.to_string(), next_doc); + current = current + .get_mut(&i.to_string()) + .unwrap() + .as_document_mut() + .unwrap(); + } + fs::write( + target_dir.join("nested_doc"), + bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, + )?; + + // Document with large binary data + let large_binary = doc! { + "binary": Bson::Binary(bson::Binary { + subtype: bson::spec::BinarySubtype::Generic, + bytes: vec![0xFF; 1024 * 1024] // 1MB of data + }) + }; + fs::write( + target_dir.join("large_binary"), + bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?, + )?; + + Ok(()) +} diff --git a/src/spec.rs b/src/spec.rs index 04ec7708..e853c150 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -21,7 +21,14 @@ //! Constants derived from the [BSON Specification Version 1.1](http://bsonspec.org/spec.html). -use std::convert::From; +use std::{convert::From, fmt}; + +impl fmt::LowerHex for BinarySubtype { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let value: u8 = (*self).into(); + fmt::LowerHex::fmt(&value, f) + } +} const ELEMENT_TYPE_FLOATING_POINT: u8 = 0x01; const ELEMENT_TYPE_UTF8_STRING: u8 = 0x02;