Skip to content

Commit

Permalink
minor: remove useless Box from streaming chunkers
Browse files Browse the repository at this point in the history
As tazjin points out in issue #25 the use of Box<Read> is unnecessary for
the streaming chunkers.

Fixes #25

cargo test passes
  • Loading branch information
nlfiedler committed Mar 10, 2023
1 parent 98df8a7 commit 3b5ede8
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 25 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ This project adheres to [Semantic Versioning](http://semver.org/).
This file follows the convention described at
[Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

## [Unreleased]
### Changed
- Removed unnecessary use of `Box` from `StreamCDC` in `v2016` and `v2020`.

## [3.0.1] - 2023-02-28
### Added
- nagy: Support conversion to `std::io::Error` in streaming chunkers.
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ for chunk in chunker {

### Streaming

Both the `v2016` and `v2020` modules have a streaming version of FastCDC named `StreamCDC`, which takes a boxed `Read` and uses a byte vector with capacity equal to the specified maximum chunk size.
Both the `v2016` and `v2020` modules have a streaming version of FastCDC named `StreamCDC`, which takes a `Read` and uses a byte vector with capacity equal to the specified maximum chunk size.

```rust
let source = std::fs::File::open("test/fixtures/SekienAkashita.jpg").unwrap();
let chunker = fastcdc::v2020::StreamCDC::new(Box::new(source), 4096, 16384, 65535);
let chunker = fastcdc::v2020::StreamCDC::new(source, 4096, 16384, 65535);
for result in chunker {
let chunk = result.unwrap();
println!("offset={} length={}", chunk.offset, chunk.length);
Expand Down
2 changes: 1 addition & 1 deletion examples/stream2016.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn main() {
let file = File::open(filename).expect("cannot open file!");
let min_size = avg_size / 4;
let max_size = avg_size * 4;
let chunker = StreamCDC::new(Box::new(file), min_size, avg_size, max_size);
let chunker = StreamCDC::new(file, min_size, avg_size, max_size);
for result in chunker {
let entry = result.expect("failed to read chunk");
println!(
Expand Down
2 changes: 1 addition & 1 deletion examples/stream2020.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn main() {
let file = File::open(filename).expect("cannot open file!");
let min_size = avg_size / 4;
let max_size = avg_size * 4;
let chunker = StreamCDC::new(Box::new(file), min_size, avg_size, max_size);
let chunker = StreamCDC::new(file, min_size, avg_size, max_size);
for result in chunker {
let entry = result.expect("failed to read chunk");
println!(
Expand Down
20 changes: 10 additions & 10 deletions src/v2016/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
//! record keeping.
//!
//! The `StreamCDC` implementation is similar to `FastCDC` except that it will
//! read data from a boxed `Read` into an internal buffer of `max_size` and
//! produce `ChunkData` values from the `Iterator`.
//! read data from a `Read` into an internal buffer of `max_size` and produce
//! `ChunkData` values from the `Iterator`.
use std::fmt;
use std::io::Read;

Expand Down Expand Up @@ -456,22 +456,22 @@ pub struct ChunkData {
/// # use std::fs::File;
/// # use fastcdc::v2016::StreamCDC;
/// let source = File::open("test/fixtures/SekienAkashita.jpg").unwrap();
/// let chunker = StreamCDC::new(Box::new(source), 4096, 16384, 65535);
/// let chunker = StreamCDC::new(source, 4096, 16384, 65535);
/// for result in chunker {
/// let chunk = result.unwrap();
/// println!("offset={} length={}", chunk.offset, chunk.length);
/// }
/// ```
///
pub struct StreamCDC {
pub struct StreamCDC<R: Read> {
/// Buffer of data from source for finding cut points.
buffer: Vec<u8>,
/// Maximum capacity of the buffer (always `max_size`).
capacity: usize,
/// Number of relevant bytes in the `buffer`.
length: usize,
/// Source from which data is read into `buffer`.
source: Box<dyn Read>,
source: R,
/// Number of bytes read from the source so far.
processed: u64,
/// True when the source produces no more data.
Expand All @@ -483,21 +483,21 @@ pub struct StreamCDC {
mask_l: u64,
}

impl StreamCDC {
impl<R: Read> StreamCDC<R> {
///
/// Construct a `StreamCDC` that will process bytes from the given source.
///
/// Uses chunk size normalization level 1 by default.
///
pub fn new(source: Box<dyn Read>, min_size: u32, avg_size: u32, max_size: u32) -> Self {
pub fn new(source: R, min_size: u32, avg_size: u32, max_size: u32) -> Self {
StreamCDC::with_level(source, min_size, avg_size, max_size, Normalization::Level1)
}

///
/// Create a new `StreamCDC` with the given normalization level.
///
pub fn with_level(
source: Box<dyn Read>,
source: R,
min_size: u32,
avg_size: u32,
max_size: u32,
Expand Down Expand Up @@ -598,7 +598,7 @@ impl StreamCDC {
}
}

impl Iterator for StreamCDC {
impl<R: Read> Iterator for StreamCDC<R> {
type Item = Result<ChunkData, Error>;

fn next(&mut self) -> Option<Result<ChunkData, Error>> {
Expand Down Expand Up @@ -955,7 +955,7 @@ mod tests {
digest: "1aa7ad95f274d6ba34a983946ebc5af3".into(),
},
];
let chunker = StreamCDC::new(Box::new(file), 4096, 16384, 65535);
let chunker = StreamCDC::new(file, 4096, 16384, 65535);
let mut index = 0;
for result in chunker {
assert!(result.is_ok());
Expand Down
22 changes: 11 additions & 11 deletions src/v2020/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
//! `hash` field of the `Chunk` struct. While this value has rather low entropy,
//! it is computationally cost-free and can be put to some use with additional
//! record keeping.
//!
//!
//! The `StreamCDC` implementation is similar to `FastCDC` except that it will
//! read data from a boxed `Read` into an internal buffer of `max_size` and
//! produce `ChunkData` values from the `Iterator`.
//! read data from a `Read` into an internal buffer of `max_size` and produce
//! `ChunkData` values from the `Iterator`.
use std::fmt;
use std::io::Read;

Expand Down Expand Up @@ -544,22 +544,22 @@ pub struct ChunkData {
/// # use std::fs::File;
/// # use fastcdc::v2020::StreamCDC;
/// let source = File::open("test/fixtures/SekienAkashita.jpg").unwrap();
/// let chunker = StreamCDC::new(Box::new(source), 4096, 16384, 65535);
/// let chunker = StreamCDC::new(source, 4096, 16384, 65535);
/// for result in chunker {
/// let chunk = result.unwrap();
/// println!("offset={} length={}", chunk.offset, chunk.length);
/// }
/// ```
///
pub struct StreamCDC {
pub struct StreamCDC<R: Read> {
/// Buffer of data from source for finding cut points.
buffer: Vec<u8>,
/// Maximum capacity of the buffer (always `max_size`).
capacity: usize,
/// Number of relevant bytes in the `buffer`.
length: usize,
/// Source from which data is read into `buffer`.
source: Box<dyn Read>,
source: R,
/// Number of bytes read from the source so far.
processed: u64,
/// True when the source produces no more data.
Expand All @@ -573,21 +573,21 @@ pub struct StreamCDC {
mask_l_ls: u64,
}

impl StreamCDC {
impl<R: Read> StreamCDC<R> {
///
/// Construct a `StreamCDC` that will process bytes from the given source.
///
/// Uses chunk size normalization level 1 by default.
///
pub fn new(source: Box<dyn Read>, min_size: u32, avg_size: u32, max_size: u32) -> Self {
pub fn new(source: R, min_size: u32, avg_size: u32, max_size: u32) -> Self {
StreamCDC::with_level(source, min_size, avg_size, max_size, Normalization::Level1)
}

///
/// Create a new `StreamCDC` with the given normalization level.
///
pub fn with_level(
source: Box<dyn Read>,
source: R,
min_size: u32,
avg_size: u32,
max_size: u32,
Expand Down Expand Up @@ -692,7 +692,7 @@ impl StreamCDC {
}
}

impl Iterator for StreamCDC {
impl<R: Read> Iterator for StreamCDC<R> {
type Item = Result<ChunkData, Error>;

fn next(&mut self) -> Option<Result<ChunkData, Error>> {
Expand Down Expand Up @@ -1049,7 +1049,7 @@ mod tests {
digest: "1aa7ad95f274d6ba34a983946ebc5af3".into(),
},
];
let chunker = StreamCDC::new(Box::new(file), 4096, 16384, 65535);
let chunker = StreamCDC::new(file, 4096, 16384, 65535);
let mut index = 0;
for result in chunker {
assert!(result.is_ok());
Expand Down

0 comments on commit 3b5ede8

Please sign in to comment.