From f684d710d7fa967312bef55e2389ab4e88a73bce Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Tue, 29 Oct 2024 17:46:19 +0000 Subject: [PATCH 1/3] OSS-Fuzz: Add new fuzzer upstream Signed-off-by: Arthur Chan --- fuzz/Cargo.toml | 6 +++ fuzz/fuzz_targets/process.rs | 74 ++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 fuzz/fuzz_targets/process.rs diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 45f1a65..1605324 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -23,5 +23,11 @@ path = "fuzz_targets/streaming.rs" test = false doc = false +[[bin]] +name = "process" +path = "fuzz_targets/process.rs" +test = false +doc = false + # Work around https://github.com/rust-lang/cargo/issues/8338 [workspace] diff --git a/fuzz/fuzz_targets/process.rs b/fuzz/fuzz_targets/process.rs new file mode 100644 index 0000000..97a12b5 --- /dev/null +++ b/fuzz/fuzz_targets/process.rs @@ -0,0 +1,74 @@ +// The fuzzing harness fuzz test some of the the +// unicode string normalization processing + +#![no_main] + +#[macro_use] +extern crate libfuzzer_sys; +extern crate unicode_normalization; + +use unicode_normalization::{ + char::{compose, canonical_combining_class, is_combining_mark, decompose_canonical, decompose_compatible}, + UnicodeNormalization, +}; + +fuzz_target!(|data: &[u8]| { + let mut data = data; + let c = if let Some((char_value, remaining_data)) = data.split_first() { + match std::char::from_u32(*char_value as u32) { + Some(ch) => { + data = remaining_data; + ch + } + None => return, + } + } else { + return; + }; + + // Generate second character for fuzzing if data is enough + let c2 = if let Some((char_value, remaining_data)) = data.split_first() { + data = remaining_data; + std::char::from_u32(*char_value as u32) + } else { + None + }; + let string_data: String = data.iter().filter_map(|&b| std::char::from_u32(b as u32)).collect(); + + // Randomly choose a function target + match data.first().map(|&b| b % 10) { + Some(0) => { + if let Some(c2) = c2 { + let _ = compose(c, c2); + } + } + Some(1) => { + let _ = canonical_combining_class(c); + } + Some(2) => { + let _ = is_combining_mark(c); + } + Some(3) => { + let _ = string_data.nfc().collect::(); + } + Some(4) => { + let _ = string_data.nfkd().collect::(); + } + Some(5) => { + let _ = string_data.nfd().collect::(); + } + Some(6) => { + let _ = string_data.nfkc().collect::(); + } + Some(7) => { + let _ = string_data.stream_safe().collect::(); + } + Some(8) => { + decompose_canonical(c, |_| {}); + } + Some(9) => { + decompose_compatible(c, |_| {}); + } + _ => {} + } +}); From 7b9eb477b4f3a98347d533341e860e02999d1946 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 30 Oct 2024 11:20:10 +0000 Subject: [PATCH 2/3] Fix formatting Signed-off-by: Arthur Chan --- fuzz/fuzz_targets/process.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fuzz/fuzz_targets/process.rs b/fuzz/fuzz_targets/process.rs index 97a12b5..7457410 100644 --- a/fuzz/fuzz_targets/process.rs +++ b/fuzz/fuzz_targets/process.rs @@ -8,7 +8,10 @@ extern crate libfuzzer_sys; extern crate unicode_normalization; use unicode_normalization::{ - char::{compose, canonical_combining_class, is_combining_mark, decompose_canonical, decompose_compatible}, + char::{ + canonical_combining_class, compose, decompose_canonical, decompose_compatible, + is_combining_mark, + }, UnicodeNormalization, }; @@ -33,7 +36,10 @@ fuzz_target!(|data: &[u8]| { } else { None }; - let string_data: String = data.iter().filter_map(|&b| std::char::from_u32(b as u32)).collect(); + let string_data: String = data + .iter() + .filter_map(|&b| std::char::from_u32(b as u32)) + .collect(); // Randomly choose a function target match data.first().map(|&b| b % 10) { From ac1c63449a6f50a38dbee38d062ff8afa5edecf3 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 30 Oct 2024 19:00:21 +0000 Subject: [PATCH 3/3] Fix string creating logic and add comments Signed-off-by: Arthur Chan --- fuzz/fuzz_targets/process.rs | 82 ++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/fuzz/fuzz_targets/process.rs b/fuzz/fuzz_targets/process.rs index 7457410..9ae0a60 100644 --- a/fuzz/fuzz_targets/process.rs +++ b/fuzz/fuzz_targets/process.rs @@ -15,65 +15,63 @@ use unicode_normalization::{ UnicodeNormalization, }; -fuzz_target!(|data: &[u8]| { - let mut data = data; - let c = if let Some((char_value, remaining_data)) = data.split_first() { - match std::char::from_u32(*char_value as u32) { - Some(ch) => { - data = remaining_data; - ch - } - None => return, - } - } else { - return; - }; +fuzz_target!(|data: (u8, String)| { + let (function_index, string_data) = data; - // Generate second character for fuzzing if data is enough - let c2 = if let Some((char_value, remaining_data)) = data.split_first() { - data = remaining_data; - std::char::from_u32(*char_value as u32) - } else { - None - }; - let string_data: String = data - .iter() - .filter_map(|&b| std::char::from_u32(b as u32)) - .collect(); + // Create an iterator for characters + let mut chars = string_data.chars(); - // Randomly choose a function target - match data.first().map(|&b| b % 10) { - Some(0) => { - if let Some(c2) = c2 { - let _ = compose(c, c2); + // Randomly fuzz a target function + match function_index % 10 { + 0 => { + // Fuzz compose with two distinct characters + if let (Some(c1), Some(c2)) = (chars.next(), chars.next()) { + let _ = compose(c1, c2); } } - Some(1) => { - let _ = canonical_combining_class(c); + 1 => { + // Fuzz canonical_combining_class + if let Some(c) = chars.next() { + let _ = canonical_combining_class(c); + } } - Some(2) => { - let _ = is_combining_mark(c); + 2 => { + // Fuzz is_combining_mark + if let Some(c) = chars.next() { + let _ = is_combining_mark(c); + } } - Some(3) => { + 3 => { + // Fuzz NFC let _ = string_data.nfc().collect::(); } - Some(4) => { + 4 => { + // Fuzz NFKD let _ = string_data.nfkd().collect::(); } - Some(5) => { + 5 => { + // Fuzz NFD let _ = string_data.nfd().collect::(); } - Some(6) => { + 6 => { + // Fuzz NFKC let _ = string_data.nfkc().collect::(); } - Some(7) => { + 7 => { + // Fuzz stream_safe let _ = string_data.stream_safe().collect::(); } - Some(8) => { - decompose_canonical(c, |_| {}); + 8 => { + // Fuzz decompose_canonical + if let Some(c) = chars.next() { + decompose_canonical(c, |_| {}); + } } - Some(9) => { - decompose_compatible(c, |_| {}); + 9 => { + // Fuzz decompose_compatible + if let Some(c) = chars.next() { + decompose_compatible(c, |_| {}); + } } _ => {} }