Skip to content

Commit

Permalink
New PR with Devin's complete changes (#507)
Browse files Browse the repository at this point in the history
Mostly generated by devin, serialization test updated personally, and removed redundant fuzz tests
  • Loading branch information
pmeredit authored Dec 17, 2024
1 parent e0aab6f commit 896a5e1
Show file tree
Hide file tree
Showing 8 changed files with 330 additions and 12 deletions.
31 changes: 26 additions & 5 deletions .evergreen/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@ stepback: true
command_type: system

# Protect ourself against rogue test case, or curl gone wild, that runs forever
# 12 minutes is the longest we'll ever run
exec_timeout_secs: 3600 # 12 minutes is the longest we'll ever run
# 60 minutes is the longest we'll ever run
exec_timeout_secs: 3600 # 1 hour total for security-focused fuzzing

# What to do when evergreen hits the timeout (`post:` tasks are run automatically)
timeout:
- command: shell.exec
params:
script: |
ls -la
echo "Fuzzing timed out. Collecting any available artifacts..."
if [ -d "src/fuzz/artifacts" ]; then
tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
fi
functions:
"fetch source":
Expand Down Expand Up @@ -154,7 +157,25 @@ functions:
- command: shell.exec
params:
script: |
# Nothing needs to be done here
# Archive crash artifacts if they exist and contain crashes
if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then
echo "Creating artifacts archive..."
tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
else
echo "No crashes found in artifacts directory. Skipping archive creation."
fi
# Upload crash artifacts if they exist
- command: s3.put
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: ${PROJECT_DIRECTORY}/crash-artifacts.tar.gz
remote_file: ${CURRENT_VERSION}/crash-artifacts.tar.gz
bucket: mciuploads
permissions: public-read
content_type: application/x-gzip
optional: true

pre:
- func: "fetch source"
- func: "install dependencies"
Expand Down Expand Up @@ -259,4 +280,4 @@ buildvariants:
run_on:
- ubuntu1804-test
tasks:
- name: "wasm-test"
- name: "wasm-test"
37 changes: 33 additions & 4 deletions .evergreen/run-fuzzer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,36 @@ set -o errexit

cd fuzz

# each runs for a minute
cargo +nightly fuzz run deserialize -- -rss_limit_mb=4096 -max_total_time=60
cargo +nightly fuzz run raw_deserialize -- -rss_limit_mb=4096 -max_total_time=60
cargo +nightly fuzz run iterate -- -rss_limit_mb=4096 -max_total_time=60
# Create directories for crashes and corpus
mkdir -p artifacts
mkdir -p corpus

# Generate initial corpus if directory is empty
if [ -z "$(ls -A corpus)" ]; then
echo "Generating initial corpus..."
cargo run --bin generate_corpus
fi

# Function to run fuzzer and collect crashes
run_fuzzer() {
target=$1
echo "Running fuzzer for $target"
# Run fuzzer and redirect crashes to artifacts directory
RUST_BACKTRACE=1 cargo +nightly fuzz run $target -- \
-rss_limit_mb=4096 \
-max_total_time=60 \
-artifact_prefix=artifacts/ \
-print_final_stats=1 \
corpus/
}

# Run existing targets
run_fuzzer "deserialize"
run_fuzzer "raw_deserialize"
run_fuzzer "iterate"

# Run new security-focused targets
run_fuzzer "malformed_length"
run_fuzzer "type_markers"
run_fuzzer "string_handling"
run_fuzzer "serialization"
30 changes: 28 additions & 2 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@

[package]
name = "bson-fuzz"
version = "0.0.1"
authors = ["Automatically generated"]
publish = false
edition = "2021"

[package.metadata]
cargo-fuzz = true

[dependencies.bson]
path = ".."

[dependencies.libfuzzer-sys]
version = "0.4.0"

# Prevent this from interfering with workspaces
[dependencies.serde]
version = "1.0"

[dependencies.serde_json]
version = "1.0"

[workspace]
members = ["."]

Expand All @@ -32,3 +38,23 @@ path = "fuzz_targets/raw_deserialize.rs"
[[bin]]
name = "raw_deserialize_utf8_lossy"
path = "fuzz_targets/raw_deserialize_utf8_lossy.rs"

[[bin]]
name = "malformed_length"
path = "fuzz_targets/malformed_length.rs"

[[bin]]
name = "type_markers"
path = "fuzz_targets/type_markers.rs"

[[bin]]
name = "string_handling"
path = "fuzz_targets/string_handling.rs"

[[bin]]
name = "serialization"
path = "fuzz_targets/serialization.rs"

[[bin]]
name = "generate_corpus"
path = "generate_corpus.rs"
55 changes: 55 additions & 0 deletions fuzz/fuzz_targets/serialization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#![no_main]
use bson::{
raw::{RawDocument, RawDocumentBuf},
Bson,
Document,
};
use libfuzzer_sys::fuzz_target;

fn compare_docs(doc1: &Document, doc2: &Document) -> bool {
if doc1.len() != doc2.len() {
return false;
}
for (key, value) in doc1 {
if !doc2.contains_key(key) {
return false;
}
if let Some(val2) = doc2.get(key) {
match (value, val2) {
(Bson::Double(d1), Bson::Double(d2)) => {
if (!d1.is_nan() || !d2.is_nan()) && d1 != d2 {
return false;
}
}
(v1, v2) => {
if v1 != v2 {
return false;
}
}
}
}
}
true
}

fuzz_target!(|input: &[u8]| {
if let Ok(rawdoc) = RawDocument::from_bytes(&input) {
if let Ok(doc) = Document::try_from(rawdoc) {
let out = RawDocumentBuf::try_from(&doc).unwrap();
let out_bytes = out.as_bytes();
if input != out_bytes {
let reserialized = RawDocument::from_bytes(&out_bytes).unwrap();
let reserialized_doc = Document::try_from(reserialized).unwrap();
// Ensure that the reserialized document is the same as the original document, the
// bytes can differ while still resulting in the same Document.
if !compare_docs(&doc, &reserialized_doc) {
panic!(
"Reserialized document is not the same as the original document: {:?} != \
{:?}",
doc, reserialized_doc
);
}
}
}
}
});
23 changes: 23 additions & 0 deletions fuzz/fuzz_targets/string_handling.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
extern crate bson;
use bson::{RawBsonRef, RawDocument};
use std::convert::TryInto;

fuzz_target!(|buf: &[u8]| {
if let Ok(doc) = RawDocument::from_bytes(buf) {
for elem in doc.iter_elements().flatten() {
// Convert to RawBsonRef and check string-related types
if let Ok(bson) = elem.try_into() {
match bson {
RawBsonRef::String(s) => {
let _ = s.len();
let _ = s.chars().count();
}
_ => {}
}
}
}
}
});
14 changes: 14 additions & 0 deletions fuzz/fuzz_targets/type_markers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
extern crate bson;
use bson::{RawBsonRef, RawDocument};
use std::convert::TryInto;

fuzz_target!(|buf: &[u8]| {
if let Ok(doc) = RawDocument::from_bytes(buf) {
for elem in doc.iter_elements().flatten() {
let _: Result<RawBsonRef, _> = elem.try_into();
}
}
});
143 changes: 143 additions & 0 deletions fuzz/generate_corpus.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
use bson::{doc, Bson, Decimal128};
use std::{
fs,
io::{Error, ErrorKind},
path::Path,
str::FromStr,
};

fn main() -> std::io::Result<()> {
let corpus_dir = Path::new("fuzz/corpus");
fs::create_dir_all(corpus_dir)?;

// Generate edge cases for each fuzz target
generate_length_edge_cases(corpus_dir)?;
generate_type_marker_cases(corpus_dir)?;
generate_string_edge_cases(corpus_dir)?;
generate_serialization_cases(corpus_dir)?;
Ok(())
}

fn generate_length_edge_cases(dir: &Path) -> std::io::Result<()> {
let target_dir = dir.join("malformed_length");
fs::create_dir_all(&target_dir)?;

// Invalid length
fs::write(target_dir.join("invalid_len"), vec![4, 5])?;

// Minimal valid document
let min_doc = doc! {};
fs::write(
target_dir.join("min_doc"),
bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

// Document with length near i32::MAX
let large_doc = doc! { "a": "b".repeat(i32::MAX as usize / 2) };
fs::write(
target_dir.join("large_doc"),
bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

Ok(())
}

fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> {
let target_dir = dir.join("type_markers");
fs::create_dir_all(&target_dir)?;

// Document with all BSON types
let all_types = doc! {
"double": 1.0f64,
"double_nan": f64::NAN,
"double_infinity": f64::INFINITY,
"double_neg_infinity": f64::NEG_INFINITY,
"string": "test",
"document": doc! {},
"array": vec![1, 2, 3],
"binary": Bson::Binary(bson::Binary { subtype: bson::spec::BinarySubtype::Generic, bytes: vec![1, 2, 3] }),
"object_id": bson::oid::ObjectId::new(),
"bool": true,
"date": bson::DateTime::now(),
"null": Bson::Null,
"regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }),
"int32": 123i32,
"timestamp": bson::Timestamp { time: 12345, increment: 1 },
"int64": 123i64,
"decimal128_nan": Decimal128::from_str("NaN").unwrap(),
"decimal128_infinity": Decimal128::from_str("Infinity").unwrap(),
"decimal128_neg_infinity": Decimal128::from_str("-Infinity").unwrap(),
"min_key": Bson::MinKey,
"max_key": Bson::MaxKey,
"undefined": Bson::Undefined
};
fs::write(
target_dir.join("all_types"),
bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

Ok(())
}

fn generate_string_edge_cases(dir: &Path) -> std::io::Result<()> {
let target_dir = dir.join("string_handling");
fs::create_dir_all(&target_dir)?;

// UTF-8 edge cases
let utf8_cases = doc! {
"empty": "",
"null_bytes": "hello\0world",
"unicode": "🦀💻🔒",
"high_surrogate": "\u{10000}",
"invalid_continuation": Bson::Binary(bson::Binary {
subtype: bson::spec::BinarySubtype::Generic,
bytes: vec![0x80u8, 0x80u8, 0x80u8]
}),
"overlong": Bson::Binary(bson::Binary {
subtype: bson::spec::BinarySubtype::Generic,
bytes: vec![0xC0u8, 0x80u8]
})
};
fs::write(
target_dir.join("utf8_cases"),
bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

Ok(())
}

fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> {
let target_dir = dir.join("serialization");
fs::create_dir_all(&target_dir)?;

// Deeply nested document
let mut nested_doc = doc! {};
let mut current = &mut nested_doc;
for i in 0..100 {
let next_doc = doc! {};
current.insert(i.to_string(), next_doc);
current = current
.get_mut(&i.to_string())
.unwrap()
.as_document_mut()
.unwrap();
}
fs::write(
target_dir.join("nested_doc"),
bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

// Document with large binary data
let large_binary = doc! {
"binary": Bson::Binary(bson::Binary {
subtype: bson::spec::BinarySubtype::Generic,
bytes: vec![0xFF; 1024 * 1024] // 1MB of data
})
};
fs::write(
target_dir.join("large_binary"),
bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

Ok(())
}
Loading

0 comments on commit 896a5e1

Please sign in to comment.