From 837ccb1c2e1e12eb62ce41ec529c36ebe19bcca5 Mon Sep 17 00:00:00 2001 From: Nathan Fiedler Date: Wed, 25 Jan 2023 22:02:44 -0800 Subject: [PATCH] test: add more normalization tests --- src/lib.rs | 7 +++---- src/v2016/mod.rs | 25 +++++++++++++++++++++++++ src/v2020/mod.rs | 25 +++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4c0cfd6..3c36317 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -104,10 +104,9 @@ //! chunk sizes in order to improve the overall deduplication ratio. //! //! Note that changing the minimum chunk size will almost certainly result in -//! different cut points. This is due to the "randomness" of the gear hash and -//! the process of calculating the fingerprint of the sliding window. It is best -//! to pick a minimum chunk size for your application that can remain relevant -//! indefinitely, lest you produce different sets of chunks for the same data. +//! different cut points. It is best to pick a minimum chunk size for your +//! application that can remain relevant indefinitely, lest you produce +//! different sets of chunks for the same data. //! //! Similarly, setting the maximum chunk size to be too small may result in cut //! points that were determined by the maximum size rather than the data itself. diff --git a/src/v2016/mod.rs b/src/v2016/mod.rs index 7bf63a0..c19bf83 100644 --- a/src/v2016/mod.rs +++ b/src/v2016/mod.rs @@ -575,6 +575,31 @@ mod tests { assert_eq!(results[4].length, 24700); } + #[test] + fn test_cut_sekien_16k_nc_0() { + let read_result = fs::read("test/fixtures/SekienAkashita.jpg"); + assert!(read_result.is_ok()); + let contents = read_result.unwrap(); + let chunker = FastCDC::with_level(&contents, 4096, 16384, 65535, Normalization::Level0); + let mut cursor: u64 = 0; + let mut remaining: u64 = contents.len() as u64; + let expected: Vec<(u64, u64)> = vec![ + (221561130519947581, 6634), + (15733367461443853673, 59915), + (10460176299449652894, 25597), + (6197802202431009942, 5237), + (2504464741100432583, 12083), + ]; + for (e_hash, e_length) in expected.iter() { + let (hash, pos) = chunker.cut(cursor, remaining); + assert_eq!(hash, *e_hash); + assert_eq!(pos, cursor + e_length); + cursor = pos; + remaining -= e_length; + } + assert_eq!(remaining, 0); + } + #[test] fn test_cut_sekien_16k_nc_3() { let read_result = fs::read("test/fixtures/SekienAkashita.jpg"); diff --git a/src/v2020/mod.rs b/src/v2020/mod.rs index 6aca4d2..71c8c3d 100644 --- a/src/v2020/mod.rs +++ b/src/v2020/mod.rs @@ -657,6 +657,31 @@ mod tests { assert_eq!(results[4].length, 24700); } + #[test] + fn test_cut_sekien_16k_nc_0() { + let read_result = fs::read("test/fixtures/SekienAkashita.jpg"); + assert!(read_result.is_ok()); + let contents = read_result.unwrap(); + let chunker = FastCDC::with_level(&contents, 4096, 16384, 65535, Normalization::Level0); + let mut cursor: u64 = 0; + let mut remaining: u64 = contents.len() as u64; + let expected: Vec<(u64, u64)> = vec![ + (443122261039895162, 6634), + (15733367461443853673, 59915), + (10460176299449652894, 25597), + (6197802202431009942, 5237), + (6321136627705800457, 12083), + ]; + for (e_hash, e_length) in expected.iter() { + let (hash, pos) = chunker.cut(cursor, remaining); + assert_eq!(hash, *e_hash); + assert_eq!(pos, cursor + e_length); + cursor = pos; + remaining -= e_length; + } + assert_eq!(remaining, 0); + } + #[test] fn test_cut_sekien_16k_nc_3() { let read_result = fs::read("test/fixtures/SekienAkashita.jpg");