diff --git a/.gitattributes b/.gitattributes index 26a28b82f..84d8f7d22 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ tests/data/** binary=true tests/data/**/LICENSE.*.txt binary=false -fuzz/corpus/** binary=true \ No newline at end of file +fuzz/corpus/** binary=true diff --git a/Cargo.toml b/Cargo.toml index 173f0a231..c1f0b36f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ zstd = { version = "0.13", optional = true, default-features = false } zopfli = { version = "0.8", optional = true } deflate64 = { version = "0.1.9", optional = true } lzma-rs = { version = "0.3", default-features = false, optional = true } +bitstream-io = { version = "2.3.0", optional = true } [target.'cfg(any(all(target_arch = "arm", target_pointer_width = "32"), target_arch = "mips", target_arch = "powerpc"))'.dependencies] crossbeam-utils = "0.8.20" @@ -77,6 +78,7 @@ deflate-zlib = ["flate2/zlib", "deflate-flate2"] deflate-zlib-ng = ["flate2/zlib-ng", "deflate-flate2"] deflate-zopfli = ["zopfli", "_deflate-any"] lzma = ["lzma-rs/stream"] +legacy-zip = ["bitstream-io"] unreserved = [] xz = ["lzma-rs/raw_decoder"] default = [ @@ -87,6 +89,7 @@ default = [ "lzma", "time", "zstd", + "legacy-zip", "xz", ] diff --git a/src/compression.rs b/src/compression.rs index 83a7669bd..7dbe4a099 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -38,6 +38,15 @@ pub enum CompressionMethod { /// Compress the file using LZMA #[cfg(feature = "lzma")] Lzma, + /// Legacy format + #[cfg(feature = "legacy-zip")] + Shrink, + /// Reduce (Method 2-5) + #[cfg(feature = "legacy-zip")] + Reduce(u8), + /// Method 6 Implode/explode + #[cfg(feature = "legacy-zip")] + Implode, /// Compress the file using XZ #[cfg(feature = "xz")] Xz, @@ -52,12 +61,18 @@ pub enum CompressionMethod { /// All compression methods defined for the ZIP format impl CompressionMethod { pub const STORE: Self = CompressionMethod::Stored; - pub const SHRINK: Self = CompressionMethod::Unsupported(1); - pub const REDUCE_1: Self = CompressionMethod::Unsupported(2); - pub const REDUCE_2: Self = CompressionMethod::Unsupported(3); - pub const REDUCE_3: Self = CompressionMethod::Unsupported(4); - pub const REDUCE_4: Self = CompressionMethod::Unsupported(5); - pub const IMPLODE: Self = CompressionMethod::Unsupported(6); + #[cfg(feature = "legacy-zip")] + pub const SHRINK: Self = CompressionMethod::Shrink; + #[cfg(feature = "legacy-zip")] + pub const REDUCE_1: Self = CompressionMethod::Reduce(1); + #[cfg(feature = "legacy-zip")] + pub const REDUCE_2: Self = CompressionMethod::Reduce(2); + #[cfg(feature = "legacy-zip")] + pub const REDUCE_3: Self = CompressionMethod::Reduce(3); + #[cfg(feature = "legacy-zip")] + pub const REDUCE_4: Self = CompressionMethod::Reduce(4); + #[cfg(feature = "legacy-zip")] + pub const IMPLODE: Self = CompressionMethod::Implode; #[cfg(feature = "_deflate-any")] pub const DEFLATE: Self = CompressionMethod::Deflated; #[cfg(not(feature = "_deflate-any"))] @@ -99,6 +114,18 @@ impl CompressionMethod { pub(crate) const fn parse_from_u16(val: u16) -> Self { match val { 0 => CompressionMethod::Stored, + #[cfg(feature = "legacy-zip")] + 1 => CompressionMethod::Shrink, + #[cfg(feature = "legacy-zip")] + 2 => CompressionMethod::Reduce(1), + #[cfg(feature = "legacy-zip")] + 3 => CompressionMethod::Reduce(2), + #[cfg(feature = "legacy-zip")] + 4 => CompressionMethod::Reduce(3), + #[cfg(feature = "legacy-zip")] + 5 => CompressionMethod::Reduce(4), + #[cfg(feature = "legacy-zip")] + 6 => CompressionMethod::Implode, #[cfg(feature = "_deflate-any")] 8 => CompressionMethod::Deflated, #[cfg(feature = "deflate64")] @@ -130,6 +157,13 @@ impl CompressionMethod { pub(crate) const fn serialize_to_u16(self) -> u16 { match self { CompressionMethod::Stored => 0, + #[cfg(feature = "legacy-zip")] + CompressionMethod::Shrink => 1, + #[cfg(feature = "legacy-zip")] + CompressionMethod::Reduce(n) => 1 + n as u16, + #[cfg(feature = "legacy-zip")] + CompressionMethod::Implode => 6, + #[cfg(feature = "_deflate-any")] CompressionMethod::Deflated => 8, #[cfg(feature = "deflate64")] diff --git a/src/legacy/huffman.rs b/src/legacy/huffman.rs new file mode 100644 index 000000000..6db9fd938 --- /dev/null +++ b/src/legacy/huffman.rs @@ -0,0 +1,260 @@ +use std::io::{self, Error, Seek}; + +use bitstream_io::{BitRead, BitReader, Endianness}; + +#[derive(Default, Clone, Copy)] +pub struct TableEntry { + /// Wide enough to fit the max symbol nbr. + pub sym: u16, + /// 0 means no symbol. + pub len: u8, +} + +/// Deflate uses max 288 symbols. +const MAX_HUFFMAN_SYMBOLS: usize = 288; +/// Implode uses max 16-bit codewords. +const MAX_HUFFMAN_BITS: usize = 16; +/// Seems a good trade-off. +const HUFFMAN_LOOKUP_TABLE_BITS: u8 = 8; + +pub struct HuffmanDecoder { + /// Lookup table for fast decoding of short codewords. + pub table: Vec, + /// "Sentinel bits" value for each codeword length. + pub sentinel_bits: Vec, + /// First symbol index minus first codeword mod 2**16 for each length. + pub offset_first_sym_idx: Vec, + /// Map from symbol index to symbol. + pub syms: Vec, + // num_syms:usize +} + +impl Default for HuffmanDecoder { + fn default() -> Self { + let syms = vec![0; MAX_HUFFMAN_SYMBOLS]; + let table = vec![TableEntry::default(); 1 << HUFFMAN_LOOKUP_TABLE_BITS]; + Self { + table, + sentinel_bits: vec![0; MAX_HUFFMAN_BITS + 1], + offset_first_sym_idx: vec![0; MAX_HUFFMAN_BITS + 1], + syms, + } + } +} + +/// Reverse the n least significant bits of x. +/// The (16 - n) most significant bits of the result will be zero. +pub fn reverse_lsb(x: u16, n: usize) -> u16 { + debug_assert!(n > 0); + debug_assert!(n <= 16); + x.reverse_bits() >> (16 - n) +} + +/// Initialize huffman decoder d for a code defined by the n codeword lengths. +/// Returns false if the codeword lengths do not correspond to a valid prefix +/// code. +impl HuffmanDecoder { + pub fn init(&mut self, lengths: &[u8], n: usize) -> std::io::Result<()> { + let mut count = [0; MAX_HUFFMAN_BITS + 1]; + let mut code = [0; MAX_HUFFMAN_BITS + 1]; + let mut sym_idx = [0; MAX_HUFFMAN_BITS + 1]; + // Zero-initialize the lookup table. + self.table.fill(TableEntry::default()); + + // Count the number of codewords of each length. + for i in 0..n { + debug_assert!(lengths[i] as usize <= MAX_HUFFMAN_BITS); + count[lengths[i] as usize] += 1; + } + count[0] = 0; // Ignore zero-length codewords. + // Compute sentinel_bits and offset_first_sym_idx for each length. + code[0] = 0; + sym_idx[0] = 0; + for l in 1..=MAX_HUFFMAN_BITS { + // First canonical codeword of this length. + code[l] = ((code[l - 1] + count[l - 1]) << 1) as u16; + + if count[l] != 0 && code[l] as u32 + count[l] as u32 - 1 > (1u32 << l) - 1 { + // The last codeword is longer than l bits. + return Err(Error::new( + io::ErrorKind::InvalidData, + "the last codeword is longer than len bits", + )); + } + + let s = ((code[l] as u32 + count[l] as u32) << (MAX_HUFFMAN_BITS - l)) as u32; + self.sentinel_bits[l] = s; + debug_assert!(self.sentinel_bits[l] >= code[l] as u32, "No overflow!"); + + sym_idx[l] = sym_idx[l - 1] + count[l - 1]; + self.offset_first_sym_idx[l] = sym_idx[l].wrapping_sub(code[l]); + } + + // Build mapping from index to symbol and populate the lookup table. + lengths + .iter() + .enumerate() + .take(n) + .for_each(|(i, code_len)| { + let l = *code_len as usize; + if l == 0 { + return; + } + + self.syms[sym_idx[l] as usize] = i as u16; + sym_idx[l] += 1; + + if l <= HUFFMAN_LOOKUP_TABLE_BITS as usize { + self.table_insert(i, l, code[l]); + code[l] += 1; + } + }); + + Ok(()) + } + + pub fn table_insert(&mut self, sym: usize, len: usize, codeword: u16) { + debug_assert!(len <= HUFFMAN_LOOKUP_TABLE_BITS as usize); + + let codeword = reverse_lsb(codeword, len); // Make it LSB-first. + let pad_len = HUFFMAN_LOOKUP_TABLE_BITS as usize - len; + + // Pad the pad_len upper bits with all bit combinations. + for padding in 0..(1 << pad_len) { + let index = (codeword | (padding << len)) as usize; + debug_assert!(sym <= u16::MAX as usize); + self.table[index].sym = sym as u16; + debug_assert!(len <= u8::MAX as usize); + self.table[index].len = len as u8; + } + } + + /// Use the decoder d to decode a symbol from the LSB-first zero-padded bits. + /// Returns the decoded symbol number or an error if no symbol could be decoded. + /// *num_used_bits will be set to the number of bits used to decode the symbol, + /// or zero if no symbol could be decoded. + pub fn huffman_decode( + &mut self, + length: u64, + is: &mut BitReader, + ) -> std::io::Result { + // First try the lookup table. + let read_bits1 = (HUFFMAN_LOOKUP_TABLE_BITS as u64).min(length - is.position_in_bits()?); + let lookup_bits = !is.read::(read_bits1 as u32)? as usize; + debug_assert!(lookup_bits < self.table.len()); + if self.table[lookup_bits].len != 0 { + debug_assert!(self.table[lookup_bits].len <= HUFFMAN_LOOKUP_TABLE_BITS); + is.seek_bits(io::SeekFrom::Current( + -(read_bits1 as i64) + self.table[lookup_bits].len as i64, + ))?; + return Ok(self.table[lookup_bits].sym); + } + + // Then do canonical decoding with the bits in MSB-first order. + let read_bits2 = (HUFFMAN_LOOKUP_TABLE_BITS as u64).min(length - is.position_in_bits()?); + let mut bits = reverse_lsb( + (lookup_bits | ((!is.read::(read_bits2 as u32)? as usize) << read_bits1)) as u16, + MAX_HUFFMAN_BITS, + ); + + for l in HUFFMAN_LOOKUP_TABLE_BITS as usize + 1..=MAX_HUFFMAN_BITS { + if (bits as u32) < self.sentinel_bits[l] { + bits >>= MAX_HUFFMAN_BITS - l; + let sym_idx = (self.offset_first_sym_idx[l] as usize + bits as usize) & 0xFFFF; + //assert(sym_idx < self.num_syms); + is.seek_bits(io::SeekFrom::Current( + -(read_bits1 as i64 + read_bits2 as i64) + l as i64, + ))?; + return Ok(self.syms[sym_idx]); + } + } + Err(Error::new( + io::ErrorKind::InvalidData, + "huffman decode failed", + )) + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use bitstream_io::{BitReader, LittleEndian}; + + use super::HuffmanDecoder; + #[test] + fn test_huffman_decode_basic() { + let lens = [ + 3, // sym 0: 000 + 3, // sym 1: 001 + 3, // sym 2: 010 + 3, // sym 3: 011 + 3, // sym 4: 100 + 3, // sym 5: 101 + 4, // sym 6: 1100 + 4, // sym 7: 1101 + 0, // sym 8: + 0, // sym 9: + 0, // sym 10: + 0, // sym 11: + 0, // sym 12: + 0, // sym 13: + 0, // sym 14: + 0, // sym 15: + 6, // sym 16: 111110 + 5, // sym 17: 11110 + 4, // sym 18: 1110 + ]; + + let mut d = HuffmanDecoder::default(); + d.init(&lens, lens.len()).unwrap(); + + // 000 (msb-first) -> 000 (lsb-first) + assert_eq!( + d.huffman_decode( + 8, + &mut BitReader::endian(&mut Cursor::new(vec![!0x0]), LittleEndian) + ) + .unwrap(), + 0 + ); + + /* 011 (msb-first) -> 110 (lsb-first)*/ + assert_eq!( + d.huffman_decode( + 8, + &mut BitReader::endian(&mut Cursor::new(vec![!0b110]), LittleEndian) + ) + .unwrap(), + 0b011 + ); + + /* 11110 (msb-first) -> 01111 (lsb-first)*/ + assert_eq!( + d.huffman_decode( + 8, + &mut BitReader::endian(&mut Cursor::new(vec![!0b1111]), LittleEndian) + ) + .unwrap(), + 0b10001 + ); + + /* 111110 (msb-first) -> 011111 (lsb-first)*/ + assert_eq!( + d.huffman_decode( + 8, + &mut BitReader::endian(&mut Cursor::new(vec![!0b11111]), LittleEndian) + ) + .unwrap(), + 0b10000 + ); + + /* 1111111 (msb-first) -> 1111111 (lsb-first)*/ + assert!(d + .huffman_decode( + 8, + &mut BitReader::endian(&mut Cursor::new(vec![!0x7f]), LittleEndian) + ) + .is_err()); + } +} diff --git a/src/legacy/implode.rs b/src/legacy/implode.rs new file mode 100644 index 000000000..2a8c622f6 --- /dev/null +++ b/src/legacy/implode.rs @@ -0,0 +1,217 @@ +use super::huffman::HuffmanDecoder; +use super::lz77::lz77_output_backref; +use bitstream_io::{BitRead, BitReader, Endianness, LittleEndian}; +use std::collections::VecDeque; +use std::io::{self, copy, Cursor, Error, Read, Result}; + +/// Initialize the Huffman decoder d with num_lens codeword lengths read from is. +/// Returns false if the input is invalid. +fn read_huffman_code( + is: &mut BitReader, + num_lens: usize, +) -> std::io::Result { + let mut lens = [0; 1 << 8]; + let mut len_count = [0; 17]; + // debug_assert!(num_lens <= sizeof(lens) / sizeof(lens[0])); + + // Number of bytes representing the Huffman code. + let byte = is.read::(8)?; + let num_bytes = (byte + 1) as usize; + + let mut codeword_idx = 0; + for _byte_idx in 0..num_bytes { + let byte = is.read::(8)?; + + let codeword_len = (byte & 0xf) + 1; /* Low four bits plus one. */ + let run_length = (byte >> 4) + 1; /* High four bits plus one. */ + + debug_assert!(codeword_len >= 1 && codeword_len <= 16); + //debug_assert!(codeword_len < sizeof(len_count) / sizeof(len_count[0])); + len_count[codeword_len as usize] += run_length; + + if (codeword_idx + run_length) as usize > num_lens { + return Err(Error::new( + io::ErrorKind::InvalidData, + "too many codeword lengths", + )); + } + for _ in 0..run_length { + debug_assert!((codeword_idx as usize) < num_lens); + lens[codeword_idx as usize] = codeword_len as u8; + codeword_idx += 1; + } + } + + debug_assert!(codeword_idx as usize <= num_lens); + if (codeword_idx as usize) < num_lens { + return Err(Error::new( + io::ErrorKind::InvalidData, + "not enough codeword lengths", + )); + } + + // Check that the Huffman tree is full. + let mut avail_codewords = 1; + for i in 1..=16 { + debug_assert!(avail_codewords >= 0); + avail_codewords *= 2; + avail_codewords -= len_count[i] as i32; + if avail_codewords < 0 { + return Err(Error::new( + io::ErrorKind::InvalidData, + "huffman tree is not full", + )); + } + } + if avail_codewords != 0 { + // Not all codewords were used. + return Err(Error::new( + io::ErrorKind::InvalidData, + "not all codewords were used", + )); + } + + let mut d = HuffmanDecoder::default(); + d.init(&lens, num_lens)?; + Ok(d) +} + +fn hwexplode( + src: &[u8], + uncomp_len: usize, + large_wnd: bool, + lit_tree: bool, + pk101_bug_compat: bool, + dst: &mut VecDeque, +) -> std::io::Result<()> { + let bit_length = src.len() as u64 * 8; + let mut is = BitReader::endian(Cursor::new(&src), LittleEndian); + let mut lit_decoder_opt = if lit_tree { + Some(read_huffman_code(&mut is, 256)?) + } else { + None + }; + let mut len_decoder = read_huffman_code(&mut is, 64)?; + let mut dist_decoder = read_huffman_code(&mut is, 64)?; + let min_len = if pk101_bug_compat && large_wnd { + 3 + } else if !pk101_bug_compat && lit_tree { + 3 + } else { + 2 + }; + let dist_low_bits = if large_wnd { 7 } else { 6 }; + while dst.len() < uncomp_len { + let is_literal = is.read_bit()?; + if is_literal { + // Literal. + let sym; + if let Some(lit_decoder) = &mut lit_decoder_opt { + sym = lit_decoder.huffman_decode(bit_length, &mut is)?; + } else { + sym = is.read::(8)? as u16; + } + debug_assert!(sym <= u8::MAX as u16); + dst.push_back(sym as u8); + continue; + } + + // Read the low dist bits. + let mut dist = is.read::(dist_low_bits)?; + // Read the Huffman-encoded high dist bits. + let sym = dist_decoder.huffman_decode(bit_length, &mut is)?; + dist |= (sym as u16) << dist_low_bits; + dist += 1; + + // Read the Huffman-encoded len. + let sym = len_decoder.huffman_decode(bit_length, &mut is)?; + let mut len = (sym + min_len) as usize; + + if sym == 63 { + // Read an extra len byte. + len += is.read::(8)? as usize; + } + let len = len.min(uncomp_len - dst.len()); + if len <= uncomp_len - dst.len() && dist as usize <= dst.len() { + // Enough room and no implicit zeros; chunked copy. + lz77_output_backref(dst, dist as usize, len); + } else { + // Copy, handling overlap and implicit zeros. + for _i in 0..len { + if dist as usize > dst.len() { + dst.push_back(0); + continue; + } + dst.push_back(dst[dst.len() - dist as usize]); + } + } + } + Ok(()) +} + +#[derive(Debug)] +pub struct ImplodeDecoder { + compressed_reader: R, + uncompressed_size: u64, + stream_read: bool, + large_wnd: bool, + lit_tree: bool, + stream: VecDeque, +} + +impl ImplodeDecoder { + pub fn new(inner: R, uncompressed_size: u64, flags: u16) -> Self { + let large_wnd = (flags & 2) != 0; + let lit_tree = (flags & 4) != 0; + ImplodeDecoder { + compressed_reader: inner, + uncompressed_size, + stream_read: false, + large_wnd, + lit_tree, + stream: VecDeque::new(), + } + } + + pub fn finish(mut self) -> Result> { + copy(&mut self.compressed_reader, &mut self.stream)?; + Ok(self.stream) + } +} + +impl Read for ImplodeDecoder { + fn read(&mut self, buf: &mut [u8]) -> Result { + if !self.stream_read { + self.stream_read = true; + let mut compressed_bytes = Vec::new(); + if let Err(err) = self.compressed_reader.read_to_end(&mut compressed_bytes) { + return Err(err.into()); + } + hwexplode( + &compressed_bytes, + self.uncompressed_size as usize, + self.large_wnd, + self.lit_tree, + false, + &mut self.stream, + )?; + } + let bytes_read = self.stream.len().min(buf.len()); + buf[..bytes_read].copy_from_slice(&self.stream.drain(..bytes_read).collect::>()); + Ok(bytes_read) + } +} + +#[cfg(test)] +mod tests { + use super::hwexplode; + use std::collections::VecDeque; + const HAMLET_256: &[u8; 249] = include_bytes!("../../tests/implode_hamlet_256.bin"); + + #[test] + fn test_explode_hamlet_256() { + let mut dst = VecDeque::new(); + hwexplode(HAMLET_256, 256, false, false, false, &mut dst).unwrap(); + assert_eq!(dst.len(), 256); + } +} diff --git a/src/legacy/lz77.rs b/src/legacy/lz77.rs new file mode 100644 index 000000000..939df9b90 --- /dev/null +++ b/src/legacy/lz77.rs @@ -0,0 +1,10 @@ +use std::collections::VecDeque; + +/// Output the (dist,len) back reference at dst_pos in dst. +pub(crate) fn lz77_output_backref(dst: &mut VecDeque, dist: usize, len: usize) { + // debug_assert!(dist <= dst_pos, "cannot reference before beginning of dst"); + + for _ in 0..len { + dst.push_back(dst[dst.len() - dist]); + } +} diff --git a/src/legacy/mod.rs b/src/legacy/mod.rs new file mode 100644 index 000000000..15897e5ad --- /dev/null +++ b/src/legacy/mod.rs @@ -0,0 +1,8 @@ +mod huffman; +mod lz77; +pub mod shrink; +pub use shrink::*; +pub mod reduce; +pub use reduce::*; +pub mod implode; +pub use implode::*; diff --git a/src/legacy/reduce.rs b/src/legacy/reduce.rs new file mode 100644 index 000000000..56fe82948 --- /dev/null +++ b/src/legacy/reduce.rs @@ -0,0 +1,293 @@ +use std::collections::VecDeque; +use std::io::{self, copy, Read, Result}; + +use crate::legacy::lz77::lz77_output_backref; +use bitstream_io::{BitRead, BitReader, Endianness, LittleEndian}; + +/// Number of bits used to represent indices in a follower set of size n. +fn follower_idx_bw(n: u8) -> u8 { + debug_assert!(n <= 32); + match n { + 0 => 0, + 1 => 1, + _ => 8 - (n - 1).leading_zeros() as u8, + } +} + +#[derive(Default, Clone, Copy)] +struct FollowerSet { + size: u8, + idx_bw: u8, + followers: [u8; 32], +} + +/// Read the follower sets from is into fsets. Returns true on success. +type FollowerSetArray = [FollowerSet; u8::MAX as usize + 1]; + +fn read_follower_sets( + is: &mut BitReader, +) -> io::Result { + let mut fsets = [FollowerSet::default(); u8::MAX as usize + 1]; + for i in (0..=u8::MAX as usize).rev() { + let n = is.read::(6)?; + if n > 32 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "invalid follower set", + )); + } + fsets[i].size = n; + fsets[i].idx_bw = follower_idx_bw(n); + + for j in 0..fsets[i].size as usize { + fsets[i].followers[j] = is.read::(8)?; + } + } + + Ok(fsets) +} + +/// Read the next byte from is, decoded based on prev_byte and the follower sets. +/// The byte is returned in *out_byte. The function returns true on success, +/// and false on bad data or end of input. +fn read_next_byte( + is: &mut BitReader, + prev_byte: u8, + fsets: &mut FollowerSetArray, +) -> io::Result { + if fsets[prev_byte as usize].size == 0 // No followers + || is.read::(1)? == 1 + // Indicates next symbol is a literal byte + { + return Ok(is.read::(8)?); + } + + // The bits represent the index of a follower byte. + let idx_bw = fsets[prev_byte as usize].idx_bw; + let follower_idx = is.read::(idx_bw as u32)? as usize; + if follower_idx >= fsets[prev_byte as usize].size as usize { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "invalid follower index", + )); + } + Ok(fsets[prev_byte as usize].followers[follower_idx]) +} + +fn max_len(comp_factor: u8) -> usize { + let v_len_bits = (8 - comp_factor) as usize; + debug_assert!(comp_factor >= 1 && comp_factor <= 4); + // Bits in V + extra len byte + implicit 3. + ((1 << v_len_bits) - 1) + u8::MAX as usize + 3 +} + +fn max_dist(comp_factor: u8) -> usize { + debug_assert!(comp_factor >= 1 && comp_factor <= 4); + let v_dist_bits = comp_factor as usize; + // Bits in V * 256 + W byte + implicit 1. */ + 1 << (v_dist_bits + 8) +} + +const DLE_BYTE: u8 = 0x90; + +/// Get the n least significant bits of x. +fn lsb(x: u8, n: u8) -> u8 { + if n >= 8 { + return x; + } + x & ((1 << n) - 1) +} + +fn hwexpand( + src: &[u8], + uncomp_len: usize, + comp_factor: u8, + dst: &mut VecDeque, +) -> io::Result<()> { + debug_assert!(comp_factor >= 1 && comp_factor <= 4); + + let mut is = BitReader::endian(src, LittleEndian); + let mut fsets = read_follower_sets(&mut is)?; + + // Number of bits in V used for backref length. + let v_len_bits = 8 - comp_factor; + + let mut curr_byte = 0; // The first "previous byte" is implicitly zero. + + while dst.len() < uncomp_len { + // Read a literal byte or DLE marker. + curr_byte = read_next_byte(&mut is, curr_byte, &mut fsets)?; + if curr_byte != DLE_BYTE { + // Output a literal byte. + dst.push_back(curr_byte); + continue; + } + + // Read the V byte which determines the length. + curr_byte = read_next_byte(&mut is, curr_byte, &mut fsets)?; + if curr_byte == 0 { + // Output a literal DLE byte. + dst.push_back(DLE_BYTE); + continue; + } + let v = curr_byte; + let mut len = lsb(v, v_len_bits) as usize; + if len == (1 << v_len_bits) - 1 { + // Read an extra length byte. + curr_byte = read_next_byte(&mut is, curr_byte, &mut fsets)?; + len += curr_byte as usize; + } + len += 3; + + // Read the W byte, which together with V gives the distance. + curr_byte = read_next_byte(&mut is, curr_byte, &mut fsets)?; + let dist = (((v as usize) >> v_len_bits) << 8) + curr_byte as usize + 1; + + debug_assert!(len <= max_len(comp_factor)); + debug_assert!(dist as usize <= max_dist(comp_factor)); + + // Output the back reference. + if len <= uncomp_len - dst.len() && dist as usize <= dst.len() { + // Enough room and no implicit zeros; chunked copy. + lz77_output_backref(dst, dist as usize, len); + } else { + // Copy, handling overlap and implicit zeros. + for _i in 0..len { + if dist as usize > dst.len() { + dst.push_back(0); + continue; + } + dst.push_back(dst[dst.len() - dist as usize]); + } + } + } + Ok(()) +} + +#[derive(Debug)] +pub struct ReduceDecoder { + compressed_reader: R, + uncompressed_size: u64, + stream_read: bool, + comp_factor: u8, + stream: VecDeque, +} + +impl ReduceDecoder { + pub fn new(inner: R, uncompressed_size: u64, comp_factor: u8) -> Self { + ReduceDecoder { + compressed_reader: inner, + uncompressed_size, + stream_read: false, + comp_factor, + stream: VecDeque::new(), + } + } + + pub fn finish(mut self) -> Result> { + copy(&mut self.compressed_reader, &mut self.stream)?; + Ok(self.stream) + } +} + +impl Read for ReduceDecoder { + fn read(&mut self, buf: &mut [u8]) -> Result { + if !self.stream_read { + self.stream_read = true; + let mut compressed_bytes = Vec::new(); + if let Err(err) = self.compressed_reader.read_to_end(&mut compressed_bytes) { + return Err(err.into()); + } + hwexpand( + &compressed_bytes, + self.uncompressed_size as usize, + self.comp_factor, + &mut self.stream, + )?; + } + let bytes_read = self.stream.len().min(buf.len()); + buf[..bytes_read].copy_from_slice(&self.stream.drain(..bytes_read).collect::>()); + Ok(bytes_read) + } +} + +#[cfg(test)] +mod tests { + use super::hwexpand; + use crate::legacy::reduce::{follower_idx_bw, lsb, max_dist}; + use std::collections::VecDeque; + const HAMLET_2048: &[u8; 1285] = include_bytes!("../../tests/reduce_hamlet_2048.bin"); + + #[test] + fn test_lsb() { + assert_eq!(lsb(0xFF, 8), 0xFF); + for i in 0..7 { + assert_eq!(lsb(0xFF, i), (1 << i) - 1); + } + } + + #[test] + fn test_expand_hamlet2048() { + let mut dst = VecDeque::new(); + hwexpand(HAMLET_2048, 2048, 4, &mut dst).unwrap(); + assert_eq!(dst.len(), 2048); + } + + /* + Put some text first to make PKZIP actually use Reduce compression. + Target the code path which copies a zero when dist > current position. + + $ curl -O http://cd.textfiles.com/originalsw/25/pkz092.exe + $ dosbox -c "mount c ." -c "c:" -c "pkz092" -c "exit" + $ dd if=hamlet.txt bs=1 count=2048 > a + $ dd if=/dev/zero bs=1 count=1024 >> a + $ dosbox -c "mount c ." -c "c:" -c "pkzip -ea4 a.zip a" -c "exit" + $ xxd -i -s 31 -l $(expr $(find A.ZIP -printf %s) - 100) A.ZIP + */ + const ZEROS_REDUCED: &[u8; 1297] = include_bytes!("../../tests/reduce_zero_reduced.bin"); + + #[test] + fn test_expand_zeros() { + let mut dst = VecDeque::new(); + hwexpand(ZEROS_REDUCED, 2048 + 1024, 4, &mut dst).unwrap(); + assert_eq!(dst.len(), 2048 + 1024); + for i in 0..(1 << 10) { + assert_eq!(dst[(1 << 11) + i], 0); + } + } + + fn orig_follower_idx_bw(n: u8) -> u8 { + if n > 16 { + return 5; + } + if n > 8 { + return 4; + } + if n > 4 { + return 3; + } + if n > 2 { + return 2; + } + if n > 0 { + return 1; + } + return 0; + } + + #[test] + fn test_follower_idx_bw() { + for i in 0..=32 { + assert_eq!(orig_follower_idx_bw(i), follower_idx_bw(i)); + } + } + + #[test] + fn test_max_dist() { + for i in 1..=4 { + let v_dist_bits = i as usize; + let c = 1 << (v_dist_bits + 8); + assert_eq!(max_dist(i), c); + } + } +} diff --git a/src/legacy/shrink.rs b/src/legacy/shrink.rs new file mode 100644 index 000000000..f9e8a2d67 --- /dev/null +++ b/src/legacy/shrink.rs @@ -0,0 +1,397 @@ +use std::collections::VecDeque; +use std::io::{self, copy, Error, Read}; + +use bitstream_io::{BitRead, BitReader, Endianness, LittleEndian}; + +const MIN_CODE_SIZE: u8 = 9; +const MAX_CODE_SIZE: u8 = 13; + +const MAX_CODE: usize = (1 << MAX_CODE_SIZE) - 1; +const CONTROL_CODE: usize = 256; +const INC_CODE_SIZE: u16 = 1; +const PARTIAL_CLEAR: u16 = 2; + +// const HASH_BITS: usize = MAX_CODE_SIZE + 1; /* For a load factor of 0.5. */ +// const HASHTAB_SIZE: usize = 1 << HASH_BITS; +const UNKNOWN_LEN: u16 = u16::MAX; + +struct CodeQueue { + next_idx: usize, + codes: Vec>, +} + +impl CodeQueue { + fn new() -> Self { + let mut codes = vec![None; MAX_CODE as usize - CONTROL_CODE + 1]; + for (i, code) in (CONTROL_CODE as u16 + 1..=MAX_CODE as u16).enumerate() { + codes[i] = Some(code); + } + Self { next_idx: 0, codes } + } + + // Return the next code in the queue, or INVALID_CODE if the queue is empty. + fn next(&self) -> Option { + // assert(q->next_idx < sizeof(q->codes) / sizeof(q->codes[0])); + if let Some(Some(next)) = self.codes.get(self.next_idx) { + Some(*next) + } else { + None + } + } + + /// Return and remove the next code from the queue, or return INVALID_CODE if + /// the queue is empty. + fn remove_next(&mut self) -> Option { + let res = self.next(); + if res.is_some() { + self.next_idx += 1; + } + res + } +} + +#[derive(Clone, Debug, Copy)] +struct Codetab { + last_dst_pos: usize, + prefix_code: Option, + len: u16, + ext_byte: u8, +} + +impl Default for Codetab { + fn default() -> Self { + Self { + prefix_code: None, + ext_byte: 0, + len: 0, + last_dst_pos: 0, + } + } +} + +impl Codetab { + pub fn create_new() -> [Self; MAX_CODE + 1] { + let mut codetab = (0..=u8::MAX) + .map(|i| Codetab { + prefix_code: Some(i as u16), + ext_byte: i, + len: 1, + last_dst_pos: 0, + }) + .collect::>(); + codetab.resize(MAX_CODE + 1, Codetab::default()); + codetab.try_into().unwrap() + } +} + +fn unshrink_partial_clear(codetab: &mut [Codetab], queue: &mut CodeQueue) { + let mut is_prefix = [false; MAX_CODE + 1]; + + // Scan for codes that have been used as a prefix. + for i in CONTROL_CODE + 1..=MAX_CODE { + if let Some(prefix_code) = codetab[i].prefix_code { + is_prefix[prefix_code as usize] = true; + } + } + + // Clear "non-prefix" codes in the table; populate the code queue. + let mut code_queue_size = 0; + for i in CONTROL_CODE + 1..MAX_CODE { + if !is_prefix[i] { + codetab[i].prefix_code = None; + queue.codes[code_queue_size] = Some(i as u16); + code_queue_size += 1; + } + } + queue.codes[code_queue_size] = None; // End-of-queue marker. + queue.next_idx = 0; +} + +/// Read the next code from the input stream and return it in next_code. Returns +/// false if the end of the stream is reached. If the stream contains invalid +/// data, next_code is set to INVALID_CODE but the return value is still true. +fn read_code( + is: &mut BitReader, + code_size: &mut u8, + codetab: &mut [Codetab], + queue: &mut CodeQueue, +) -> io::Result> { + // assert(sizeof(code) * CHAR_BIT >= *code_size); + let code = is.read::(*code_size as u32)?; + + // Handle regular codes (the common case). + if code != CONTROL_CODE as u16 { + return Ok(Some(code)); + } + + // Handle control codes. + if let Ok(control_code) = is.read::(*code_size as u32) { + match control_code { + INC_CODE_SIZE => { + if *code_size >= MAX_CODE_SIZE { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "tried to increase code size when already at maximum", + )); + } + *code_size += 1; + } + PARTIAL_CLEAR => { + unshrink_partial_clear(codetab, queue); + } + _ => { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Invalid control code {}", control_code), + )); + } + } + return read_code(is, code_size, codetab, queue); + } + Ok(None) +} + +/// Output the string represented by a code into dst at dst_pos. Returns +/// HWUNSHRINK_OK on success, and also updates *first_byte and *len with the +/// first byte and length of the output string, respectively. +fn output_code( + code: u16, + dst: &mut VecDeque, + prev_code: u16, + codetab: &mut [Codetab], + queue: &mut CodeQueue, + first_byte: &mut u8, + len: &mut usize, +) -> io::Result<()> { + debug_assert!(code <= MAX_CODE as u16 && code != CONTROL_CODE as u16); + if code <= u8::MAX as u16 { + // Output literal byte. + *first_byte = code as u8; + *len = 1; + dst.push_back(code as u8); + return Ok(()); + } + + if codetab[code as usize].prefix_code.is_none() + || codetab[code as usize].prefix_code == Some(code) + { + // Reject invalid codes. Self-referential codes may exist in + // the table but cannot be used. + return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid code")); + } + + if codetab[code as usize].len != UNKNOWN_LEN { + // Output string with known length (the common case). + let ct = &codetab[code as usize]; + for i in ct.last_dst_pos..ct.last_dst_pos + ct.len as usize { + dst.push_back(dst[i]); + } + *first_byte = dst[ct.last_dst_pos]; + *len = ct.len as usize; + return Ok(()); + } + + // Output a string of unknown length. This happens when the prefix + // was invalid (due to partial clearing) when the code was inserted into + // the table. The prefix can then become valid when it's added to the + // table at a later point. + let prefix_code = codetab[code as usize].prefix_code.unwrap(); + if cfg!(debug_assertions) { + let tab_entry = codetab[code as usize]; + assert!(tab_entry.len == UNKNOWN_LEN); + assert!(tab_entry.prefix_code.unwrap() as usize > CONTROL_CODE); + } + + if Some(prefix_code) == queue.next() { + /* The prefix code hasn't been added yet, but we were just + about to: the KwKwK case. Add the previous string extended + with its first byte. */ + codetab[prefix_code as usize] = Codetab { + prefix_code: Some(prev_code), + ext_byte: *first_byte, + len: codetab[prev_code as usize].len + 1, + last_dst_pos: codetab[prev_code as usize].last_dst_pos, + }; + dst.push_back(*first_byte); + } else if codetab[prefix_code as usize].prefix_code.is_none() { + // The prefix code is still invalid. + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "invalid prefix code", + )); + } + + // Output the prefix string, then the extension byte. + *len = codetab[prefix_code as usize].len as usize + 1; + let last_dst_pos = dst.len(); + let ct = &codetab[prefix_code as usize]; + for i in ct.last_dst_pos..ct.last_dst_pos + ct.len as usize { + dst.push_back(dst[i]); + } + dst.push_back(codetab[code as usize].ext_byte); + *first_byte = dst[ct.last_dst_pos]; + + // Update the code table now that the string has a length and pos. + debug_assert!(prev_code != code); + codetab[code as usize].len = *len as u16; + codetab[code as usize].last_dst_pos = last_dst_pos; + + Ok(()) +} + +fn hwunshrink(src: &[u8], uncompressed_size: usize, dst: &mut VecDeque) -> io::Result<()> { + let mut codetab = Codetab::create_new(); + let mut queue = CodeQueue::new(); + let mut is = BitReader::endian(src, LittleEndian); + + let mut code_size = MIN_CODE_SIZE; + + // Handle the first code separately since there is no previous code. + let Ok(Some(curr_code)) = read_code(&mut is, &mut code_size, &mut codetab, &mut queue) else { + return Ok(()); + }; + + debug_assert!(curr_code != CONTROL_CODE as u16); + if curr_code > u8::MAX as u16 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "the first code must be a literal", + )); + } + let mut first_byte = curr_code as u8; + codetab[curr_code as usize].last_dst_pos = dst.len(); + dst.push_back(curr_code as u8); + + let mut prev_code = curr_code; + while dst.len() < uncompressed_size { + let Ok(curr_code) = read_code(&mut is, &mut code_size, &mut codetab, &mut queue) else { + break; + }; + + let Some(curr_code) = curr_code else { + return Err(Error::new(io::ErrorKind::InvalidData, "invalid code")); + }; + + let dst_pos = dst.len(); + // Handle KwKwK: next code used before being added. + if Some(curr_code) == queue.next() { + if codetab[prev_code as usize].prefix_code.is_none() { + return Err(Error::new( + io::ErrorKind::InvalidData, + "previous code no longer valid", + )); + } + // Extend the previous code with its first byte. + debug_assert!(curr_code != prev_code); + codetab[curr_code as usize] = Codetab { + prefix_code: Some(prev_code), + ext_byte: first_byte, + len: codetab[prev_code as usize].len + 1, + last_dst_pos: codetab[prev_code as usize].last_dst_pos, + }; + // dst.push_back(first_byte); + } + + // Output the string represented by the current code. + let mut len = 0; + output_code( + curr_code, + dst, + prev_code, + &mut codetab, + &mut queue, + &mut first_byte, + &mut len, + )?; + + // Add a new code to the string table if there's room. + // The string is the previous code's string extended with + // the first byte of the current code's string. + let new_code = queue.remove_next(); + if let Some(new_code) = new_code { + //debug_assert!(codetab[prev_code as usize].last_dst_pos < dst_pos); + let prev_code_entry = codetab[prev_code as usize]; + codetab[new_code as usize] = Codetab { + prefix_code: Some(prev_code), + ext_byte: first_byte, + last_dst_pos: prev_code_entry.last_dst_pos, + len: if prev_code_entry.prefix_code.is_none() { + // prev_code was invalidated in a partial + // clearing. Until that code is re-used, the + // string represented by new_code is + // indeterminate. + UNKNOWN_LEN + } else { + prev_code_entry.len + 1 + }, + }; + } + + codetab[curr_code as usize].last_dst_pos = dst_pos; + prev_code = curr_code; + } + + Ok(()) +} + +#[derive(Debug)] +pub struct ShrinkDecoder { + compressed_reader: R, + stream_read: bool, + uncompressed_size: u64, + stream: VecDeque, +} + +impl ShrinkDecoder { + pub fn new(inner: R, uncompressed_size: u64) -> Self { + ShrinkDecoder { + compressed_reader: inner, + uncompressed_size, + stream_read: false, + stream: VecDeque::new(), + } + } + + pub fn finish(mut self) -> std::io::Result> { + copy(&mut self.compressed_reader, &mut self.stream)?; + Ok(self.stream) + } +} + +impl Read for ShrinkDecoder { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + if !self.stream_read { + self.stream_read = true; + let mut compressed_bytes = Vec::new(); + if let Err(err) = self.compressed_reader.read_to_end(&mut compressed_bytes) { + return Err(err.into()); + } + hwunshrink( + &compressed_bytes, + self.uncompressed_size as usize, + &mut self.stream, + )?; + } + let bytes_read = self.stream.len().min(buf.len()); + buf[..bytes_read].copy_from_slice(&self.stream.drain(..bytes_read).collect::>()); + Ok(bytes_read) + } +} + +#[cfg(test)] +mod tests { + use crate::legacy::shrink::hwunshrink; + use std::collections::VecDeque; + + const LZW_FIG5: &[u8; 17] = b"ababcbababaaaaaaa"; + const LZW_FIG5_SHRUNK: [u8; 12] = [ + 0x61, 0xc4, 0x04, 0x1c, 0x23, 0xb0, 0x60, 0x98, 0x83, 0x08, 0xc3, 0x00, + ]; + + #[test] + fn test_unshrink_lzw_fig5() { + let mut dst = VecDeque::with_capacity(LZW_FIG5.len()); + hwunshrink(&LZW_FIG5_SHRUNK, LZW_FIG5.len(), &mut dst).unwrap(); + assert_eq!(dst, LZW_FIG5); + } +} diff --git a/src/lib.rs b/src/lib.rs index 9937d72a4..f3803bfc7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,6 +53,8 @@ mod types; pub mod write; mod zipcrypto; pub use extra_fields::ExtraField; +#[cfg(feature = "legacy-zip")] +mod legacy; #[doc = "Unstable APIs\n\ \ diff --git a/src/read.rs b/src/read.rs index 39686012d..7aeb7f407 100644 --- a/src/read.rs +++ b/src/read.rs @@ -6,6 +6,8 @@ use crate::compression::{CompressionMethod, Decompressor}; use crate::cp437::FromCp437; use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; +#[cfg(feature = "legacy-zip")] +use crate::legacy::{ImplodeDecoder, ReduceDecoder, ShrinkDecoder}; use crate::read::zip_archive::{Shared, SharedBuilder}; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod}; @@ -419,10 +421,13 @@ pub(crate) fn make_crypto_reader<'a>( Ok(reader) } +#[allow(unused_variables)] pub(crate) fn make_reader( compression_method: CompressionMethod, crc32: u32, reader: CryptoReader, + uncompressed_size: u64, + flags: u16, ) -> ZipResult { let ae2_encrypted = reader.is_ae2_encrypted(); @@ -1145,6 +1150,7 @@ fn central_header_to_zip_file_inner( crc32, compressed_size: compressed_size.into(), uncompressed_size: uncompressed_size.into(), + flags, file_name, file_name_raw, extra_field: Some(Arc::new(extra_field.to_vec())), diff --git a/src/types.rs b/src/types.rs index 2861d97ac..27b93ce5e 100644 --- a/src/types.rs +++ b/src/types.rs @@ -445,6 +445,8 @@ pub struct ZipFileData { pub compressed_size: u64, /// Size of the file when extracted pub uncompressed_size: u64, + /// ZIP flags + pub flags: u16, /// Name of the file pub file_name: Box, /// Raw file name. To be used when file_name was incorrectly decoded. @@ -1112,6 +1114,7 @@ mod test { crc32: 0, compressed_size: 0, uncompressed_size: 0, + flags: 0, file_name: file_name.clone().into_boxed_str(), file_name_raw: file_name.into_bytes().into_boxed_slice(), extra_field: None, diff --git a/src/write.rs b/src/write.rs index 96b72a2f3..48da22173 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1670,6 +1670,18 @@ impl GenericZipWriter { Ok(Box::new(|bare| Storer(bare))) } } + #[cfg(feature = "legacy-zip")] + CompressionMethod::Shrink => Err(ZipError::UnsupportedArchive( + "Shrink compression unsupported", + )), + #[cfg(feature = "legacy-zip")] + CompressionMethod::Reduce(_) => Err(ZipError::UnsupportedArchive( + "Reduce compression unsupported", + )), + #[cfg(feature = "legacy-zip")] + CompressionMethod::Implode => Err(ZipError::UnsupportedArchive( + "Implode compression unsupported", + )), #[cfg(feature = "_deflate-any")] CompressionMethod::Deflated => { let default = if cfg!(all( diff --git a/tests/data/folder/first.txt b/tests/data/folder/first.txt new file mode 100644 index 000000000..9345759e4 --- /dev/null +++ b/tests/data/folder/first.txt @@ -0,0 +1 @@ +The play of Hamlet is above all others the most stupendous monument of Shakespeare's genius, standing as a beacon to command the wonder and admiration of the world, and as a memorial to future generations, that the mind of its author was moved by little less than inspiration. Lear, with its sublime picture of human misery;—Othello, with its harrowing overthrow of a nature great and amiable;—Macbeth, with its fearful murder of a monarch, whose "virtues plead like angels trumpet-tongued against the deep damnation of his taking off,"—severally exhibit, in the most pre-eminent degree, all those mighty elements which constitute the perfection of tragic art—the grand, the pitiful, and the terrible. Hamlet is a history of mind—a tragedy of thought. It contains the deepest philosophy, and most profound wisdom; yet speaks the language of the heart, touching the secret spring of every sense and feeling. Here we have no ideal exaltation of character, but life with its blended faults ands,—a gentle nature unstrung by passing events, and thus rendered "out of tune and harsh." diff --git a/tests/data/implode.zip b/tests/data/implode.zip new file mode 100644 index 000000000..9ce50ed2a Binary files /dev/null and b/tests/data/implode.zip differ diff --git a/tests/data/reduce.zip b/tests/data/reduce.zip new file mode 100644 index 000000000..c413c87ae Binary files /dev/null and b/tests/data/reduce.zip differ diff --git a/tests/data/shrink.zip b/tests/data/shrink.zip new file mode 100644 index 000000000..efaf97077 Binary files /dev/null and b/tests/data/shrink.zip differ diff --git a/tests/implode_hamlet_256.bin b/tests/implode_hamlet_256.bin new file mode 100644 index 000000000..02806fc7a --- /dev/null +++ b/tests/implode_hamlet_256.bin @@ -0,0 +1,2 @@ + #67h<DP!ToՖKѺt˺[VYE}[ m˶,] y5-[i7ȩhî-;nٰrMmڹ ˼77V.Hh˂; 6[e·^mtM&Y ct?c o8ٚ>@k6|t~Al_ +yMTaW+]oI.A.ȶa?Mkc \ No newline at end of file diff --git a/tests/legacy_zip.rs b/tests/legacy_zip.rs new file mode 100644 index 000000000..98f88ce7a --- /dev/null +++ b/tests/legacy_zip.rs @@ -0,0 +1,55 @@ +#![cfg(feature = "legacy-zip")] + +use std::io::{self, Read}; +use zip::ZipArchive; + +#[test] +fn decompress_shrink() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/shrink.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + let mut file = archive + .by_name("FIRST.TXT") + .expect("couldn't find file in archive"); + assert_eq!("FIRST.TXT", file.name()); + + let mut content = Vec::new(); + file.read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(include_bytes!("data/folder/first.txt"), &content[..]); +} + +#[test] +fn decompress_reduce() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/reduce.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + let mut file = archive + .by_name("first.txt") + .expect("couldn't find file in archive"); + assert_eq!("first.txt", file.name()); + + let mut content = Vec::new(); + file.read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(include_bytes!("data/folder/first.txt"), &content[..]); +} + +#[test] +fn decompress_implode() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/implode.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + let mut file = archive + .by_name("first.txt") + .expect("couldn't find file in archive"); + assert_eq!("first.txt", file.name()); + + let mut content = Vec::new(); + file.read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(include_bytes!("data/folder/first.txt"), &content[..]); +} diff --git a/tests/reduce_hamlet_2048.bin b/tests/reduce_hamlet_2048.bin new file mode 100644 index 000000000..4b6ba60df Binary files /dev/null and b/tests/reduce_hamlet_2048.bin differ diff --git a/tests/reduce_zero_reduced.bin b/tests/reduce_zero_reduced.bin new file mode 100644 index 000000000..c0cba06f2 Binary files /dev/null and b/tests/reduce_zero_reduced.bin differ