Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New PR with Devin's complete changes #507

Merged
merged 11 commits into from
Dec 17, 2024
31 changes: 26 additions & 5 deletions .evergreen/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@ stepback: true
command_type: system

# Protect ourself against rogue test case, or curl gone wild, that runs forever
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, devin actually updated this comment

# 12 minutes is the longest we'll ever run
exec_timeout_secs: 3600 # 12 minutes is the longest we'll ever run
# 60 minutes is the longest we'll ever run
exec_timeout_secs: 3600 # 1 hour total for security-focused fuzzing

# What to do when evergreen hits the timeout (`post:` tasks are run automatically)
timeout:
- command: shell.exec
params:
script: |
ls -la
echo "Fuzzing timed out. Collecting any available artifacts..."
if [ -d "src/fuzz/artifacts" ]; then
tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
fi

functions:
"fetch source":
Expand Down Expand Up @@ -154,7 +157,25 @@ functions:
- command: shell.exec
params:
script: |
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These evergreen changes are completely AI written except for the echo statement. Pretty impressive

# Nothing needs to be done here
# Archive crash artifacts if they exist and contain crashes
if [ -d "src/fuzz/artifacts" ] && [ "$(ls -A src/fuzz/artifacts)" ]; then
echo "Creating artifacts archive..."
tar czf "${PROJECT_DIRECTORY}/crash-artifacts.tar.gz" src/fuzz/artifacts/
else
echo "No crashes found in artifacts directory. Skipping archive creation."
fi
# Upload crash artifacts if they exist
- command: s3.put
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
local_file: ${PROJECT_DIRECTORY}/crash-artifacts.tar.gz
remote_file: ${CURRENT_VERSION}/crash-artifacts.tar.gz
bucket: mciuploads
permissions: public-read
content_type: application/x-gzip
optional: true

pre:
- func: "fetch source"
- func: "install dependencies"
Expand Down Expand Up @@ -259,4 +280,4 @@ buildvariants:
run_on:
- ubuntu1804-test
tasks:
- name: "wasm-test"
- name: "wasm-test"
37 changes: 33 additions & 4 deletions .evergreen/run-fuzzer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,36 @@ set -o errexit

cd fuzz

# each runs for a minute
cargo +nightly fuzz run deserialize -- -rss_limit_mb=4096 -max_total_time=60
cargo +nightly fuzz run raw_deserialize -- -rss_limit_mb=4096 -max_total_time=60
cargo +nightly fuzz run iterate -- -rss_limit_mb=4096 -max_total_time=60
# Create directories for crashes and corpus
mkdir -p artifacts
mkdir -p corpus

# Generate initial corpus if directory is empty
if [ -z "$(ls -A corpus)" ]; then
echo "Generating initial corpus..."
cargo run --bin generate_corpus
fi

# Function to run fuzzer and collect crashes
run_fuzzer() {
target=$1
echo "Running fuzzer for $target"
# Run fuzzer and redirect crashes to artifacts directory
RUST_BACKTRACE=1 cargo +nightly fuzz run $target -- \
-rss_limit_mb=4096 \
-max_total_time=60 \
-artifact_prefix=artifacts/ \
-print_final_stats=1 \
corpus/
}

# Run existing targets
run_fuzzer "deserialize"
run_fuzzer "raw_deserialize"
run_fuzzer "iterate"

# Run new security-focused targets
run_fuzzer "malformed_length"
run_fuzzer "type_markers"
run_fuzzer "string_handling"
run_fuzzer "serialization"
30 changes: 28 additions & 2 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@

[package]
name = "bson-fuzz"
version = "0.0.1"
authors = ["Automatically generated"]
publish = false
edition = "2021"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd generally prefer to do edition bumps in their own PRs, but it doesn't look like it had any impact here otherwise.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Devin was not pleased that we didn't have an edition 😂


[package.metadata]
cargo-fuzz = true

[dependencies.bson]
path = ".."

[dependencies.libfuzzer-sys]
version = "0.4.0"

# Prevent this from interfering with workspaces
[dependencies.serde]
version = "1.0"

[dependencies.serde_json]
version = "1.0"

[workspace]
members = ["."]

Expand All @@ -32,3 +38,23 @@ path = "fuzz_targets/raw_deserialize.rs"
[[bin]]
name = "raw_deserialize_utf8_lossy"
path = "fuzz_targets/raw_deserialize_utf8_lossy.rs"

[[bin]]
name = "malformed_length"
path = "fuzz_targets/malformed_length.rs"

[[bin]]
name = "type_markers"
path = "fuzz_targets/type_markers.rs"

[[bin]]
name = "string_handling"
path = "fuzz_targets/string_handling.rs"

[[bin]]
name = "serialization"
path = "fuzz_targets/serialization.rs"

[[bin]]
name = "generate_corpus"
path = "generate_corpus.rs"
55 changes: 55 additions & 0 deletions fuzz/fuzz_targets/serialization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#![no_main]
use bson::{
raw::{RawDocument, RawDocumentBuf},
Bson,
Document,
};
use libfuzzer_sys::fuzz_target;

fn compare_docs(doc1: &Document, doc2: &Document) -> bool {
if doc1.len() != doc2.len() {
return false;
}
for (key, value) in doc1 {
if !doc2.contains_key(key) {
return false;
}
if let Some(val2) = doc2.get(key) {
match (value, val2) {
(Bson::Double(d1), Bson::Double(d2)) => {
if (!d1.is_nan() || !d2.is_nan()) && d1 != d2 {
return false;
}
}
(v1, v2) => {
if v1 != v2 {
return false;
}
}
}
}
}
true
}

fuzz_target!(|input: &[u8]| {
if let Ok(rawdoc) = RawDocument::from_bytes(&input) {
if let Ok(doc) = Document::try_from(rawdoc) {
let out = RawDocumentBuf::try_from(&doc).unwrap();
let out_bytes = out.as_bytes();
if input != out_bytes {
let reserialized = RawDocument::from_bytes(&out_bytes).unwrap();
let reserialized_doc = Document::try_from(reserialized).unwrap();
// Ensure that the reserialized document is the same as the original document, the
// bytes can differ while still resulting in the same Document.
if !compare_docs(&doc, &reserialized_doc) {
panic!(
"Reserialized document is not the same as the original document: {:?} != \
{:?}",
doc, reserialized_doc
);
}
}
}
}
});
23 changes: 23 additions & 0 deletions fuzz/fuzz_targets/string_handling.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
extern crate bson;
use bson::{RawBsonRef, RawDocument};
use std::convert::TryInto;

fuzz_target!(|buf: &[u8]| {
if let Ok(doc) = RawDocument::from_bytes(buf) {
for elem in doc.iter_elements().flatten() {
// Convert to RawBsonRef and check string-related types
if let Ok(bson) = elem.try_into() {
match bson {
RawBsonRef::String(s) => {
let _ = s.len();
let _ = s.chars().count();
}
_ => {}
}
}
}
}
});
14 changes: 14 additions & 0 deletions fuzz/fuzz_targets/type_markers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#![no_main]
#[macro_use]
extern crate libfuzzer_sys;
extern crate bson;
use bson::{RawBsonRef, RawDocument};
use std::convert::TryInto;

fuzz_target!(|buf: &[u8]| {
if let Ok(doc) = RawDocument::from_bytes(buf) {
for elem in doc.iter_elements().flatten() {
let _: Result<RawBsonRef, _> = elem.try_into();
}
}
});
143 changes: 143 additions & 0 deletions fuzz/generate_corpus.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
use bson::{doc, Bson, Decimal128};
use std::{
fs,
io::{Error, ErrorKind},
path::Path,
str::FromStr,
};

fn main() -> std::io::Result<()> {
let corpus_dir = Path::new("fuzz/corpus");
fs::create_dir_all(corpus_dir)?;

// Generate edge cases for each fuzz target
generate_length_edge_cases(corpus_dir)?;
generate_type_marker_cases(corpus_dir)?;
generate_string_edge_cases(corpus_dir)?;
generate_serialization_cases(corpus_dir)?;
Ok(())
}

fn generate_length_edge_cases(dir: &Path) -> std::io::Result<()> {
let target_dir = dir.join("malformed_length");
fs::create_dir_all(&target_dir)?;

// Invalid length
fs::write(target_dir.join("invalid_len"), vec![4, 5])?;

// Minimal valid document
let min_doc = doc! {};
fs::write(
target_dir.join("min_doc"),
bson::to_vec(&min_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

// Document with length near i32::MAX
let large_doc = doc! { "a": "b".repeat(i32::MAX as usize / 2) };
fs::write(
target_dir.join("large_doc"),
bson::to_vec(&large_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

Ok(())
}

fn generate_type_marker_cases(dir: &Path) -> std::io::Result<()> {
let target_dir = dir.join("type_markers");
fs::create_dir_all(&target_dir)?;

// Document with all BSON types
let all_types = doc! {
"double": 1.0f64,
"double_nan": f64::NAN,
"double_infinity": f64::INFINITY,
"double_neg_infinity": f64::NEG_INFINITY,
"string": "test",
"document": doc! {},
"array": vec![1, 2, 3],
"binary": Bson::Binary(bson::Binary { subtype: bson::spec::BinarySubtype::Generic, bytes: vec![1, 2, 3] }),
"object_id": bson::oid::ObjectId::new(),
"bool": true,
"date": bson::DateTime::now(),
"null": Bson::Null,
"regex": Bson::RegularExpression(bson::Regex { pattern: "pattern".into(), options: "i".into() }),
"int32": 123i32,
"timestamp": bson::Timestamp { time: 12345, increment: 1 },
"int64": 123i64,
"decimal128_nan": Decimal128::from_str("NaN").unwrap(),
"decimal128_infinity": Decimal128::from_str("Infinity").unwrap(),
"decimal128_neg_infinity": Decimal128::from_str("-Infinity").unwrap(),
"min_key": Bson::MinKey,
"max_key": Bson::MaxKey,
"undefined": Bson::Undefined
};
fs::write(
target_dir.join("all_types"),
bson::to_vec(&all_types).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

Ok(())
}

fn generate_string_edge_cases(dir: &Path) -> std::io::Result<()> {
let target_dir = dir.join("string_handling");
fs::create_dir_all(&target_dir)?;

// UTF-8 edge cases
let utf8_cases = doc! {
"empty": "",
"null_bytes": "hello\0world",
"unicode": "🦀💻🔒",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Devin seems to have developed a sense of humor 😂

"high_surrogate": "\u{10000}",
"invalid_continuation": Bson::Binary(bson::Binary {
subtype: bson::spec::BinarySubtype::Generic,
bytes: vec![0x80u8, 0x80u8, 0x80u8]
}),
"overlong": Bson::Binary(bson::Binary {
subtype: bson::spec::BinarySubtype::Generic,
bytes: vec![0xC0u8, 0x80u8]
})
};
fs::write(
target_dir.join("utf8_cases"),
bson::to_vec(&utf8_cases).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

Ok(())
}

fn generate_serialization_cases(dir: &Path) -> std::io::Result<()> {
let target_dir = dir.join("serialization");
fs::create_dir_all(&target_dir)?;

// Deeply nested document
let mut nested_doc = doc! {};
let mut current = &mut nested_doc;
for i in 0..100 {
let next_doc = doc! {};
current.insert(i.to_string(), next_doc);
current = current
.get_mut(&i.to_string())
.unwrap()
.as_document_mut()
.unwrap();
}
fs::write(
target_dir.join("nested_doc"),
bson::to_vec(&nested_doc).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

// Document with large binary data
let large_binary = doc! {
"binary": Bson::Binary(bson::Binary {
subtype: bson::spec::BinarySubtype::Generic,
bytes: vec![0xFF; 1024 * 1024] // 1MB of data
})
};
fs::write(
target_dir.join("large_binary"),
bson::to_vec(&large_binary).map_err(|e| Error::new(ErrorKind::Other, e.to_string()))?,
)?;

Ok(())
}
Loading