diff --git a/Cargo.lock b/Cargo.lock index 544c880..ac84935 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,6 +25,18 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.0.1" @@ -91,7 +103,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.39", ] [[package]] @@ -223,7 +235,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.39", ] [[package]] @@ -244,6 +256,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.7" @@ -321,6 +342,12 @@ dependencies = [ "typenum", ] +[[package]] +name = "dary_heap" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" + [[package]] name = "datasize" version = "0.2.14" @@ -385,6 +412,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.1" @@ -537,6 +570,21 @@ dependencies = [ "crunchy", ] +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + [[package]] name = "heck" version = "0.4.1" @@ -593,12 +641,13 @@ dependencies = [ ] [[package]] -name = "inflate" -version = "0.4.5" +name = "indexmap" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cdb29978cc5797bd8dcc8e5bf7de604891df2a8dc576973d71a281e916db2ff" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" dependencies = [ - "adler32", + "equivalent", + "hashbrown 0.14.3", ] [[package]] @@ -682,6 +731,30 @@ version = "0.2.142" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +[[package]] +name = "libflate" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7d5654ae1795afc7ff76f4365c2c8791b0feb18e8996a96adad8ffd7c3b2bf" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be5f52fb8c451576ec6b79d3f4deb327398bc05bbdbd99021a6e77a4c855d524" +dependencies = [ + "core2", + "hashbrown 0.13.2", + "rle-decode-fast", +] + [[package]] name = "linux-raw-sys" version = "0.3.5" @@ -804,8 +877,9 @@ checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "pdf" -version = "0.8.1" -source = "git+https://github.com/omkar-mohanty/pdf.git?branch=png_flate#340b35c8996bca2ff14babe882a2064d03d5e057" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3afc7e745846405d572daba57a429f30a198d955602aff8a1a9e437c2abfcaa2" dependencies = [ "aes", "bitflags", @@ -814,10 +888,11 @@ dependencies = [ "deflate", "fax", "globalcache", - "inflate", + "indexmap", "istring", "itertools", "jpeg-decoder", + "libflate", "log", "md5", "once_cell", @@ -830,8 +905,9 @@ dependencies = [ [[package]] name = "pdf_derive" -version = "0.1.22" -source = "git+https://github.com/omkar-mohanty/pdf.git?branch=png_flate#340b35c8996bca2ff14babe882a2064d03d5e057" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1038b9cb38dec35eeee9f23eacfb2480087982f9b7e9221efa8034eea9ca2360" dependencies = [ "proc-macro2", "quote", @@ -873,9 +949,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" dependencies = [ "unicode-ident", ] @@ -891,9 +967,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.26" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -937,6 +1013,12 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + [[package]] name = "rustix" version = "0.37.15" @@ -1040,9 +1122,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.15" +version = "2.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" dependencies = [ "proc-macro2", "quote", @@ -1297,6 +1379,26 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" +[[package]] +name = "zerocopy" +version = "0.7.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d075cf85bbb114e933343e087b92f2146bac0d55b534cbb8188becf0039948e" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86cd5ca076997b97ef09d3ad65efe811fa68c9e874cb636ccb211223a813b0c2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + [[package]] name = "zune-inflate" version = "0.2.53" diff --git a/Cargo.toml b/Cargo.toml index 4f5fe12..375f2b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -pdf = {git = "https://github.com/omkar-mohanty/pdf.git" , branch = "png_flate"} +pdf = "0.9.0" clap = { version = "4.2.4" , features = ["derive"] } log = "0.4.0" env_logger = "0.9.0" diff --git a/src/extractor/mod.rs b/src/extractor/mod.rs index 29afb9f..90e5641 100644 --- a/src/extractor/mod.rs +++ b/src/extractor/mod.rs @@ -6,6 +6,7 @@ use pdf::backend::Backend; use pdf::file::Cache; use pdf::file::File; use pdf::file::FileOptions; +use pdf::file::Log; use pdf::object::PageRc; use pdf::object::{Resolve, XObject}; use pdf::PdfError; @@ -18,11 +19,12 @@ pub enum Method<'a> { Bytes(&'a [u8]), } -pub fn get_pages(file: &File) -> Result> +pub fn get_pages(file: &File) -> Result> where T: Backend, K: Cache>>, Y: Cache, Arc>>, + L : Log { Ok(file .pages() @@ -30,21 +32,24 @@ where .collect::>()) } -pub fn get_raw_images(page: PageRc, file: &File) -> Result> +pub fn get_raw_images(page: PageRc, file: &File) -> Result> where T: Backend, K: Cache>>, Y: Cache, Arc>>, + L: Log { let mut images = vec![]; let resources = page.resources()?; + let resolver = file.resolver(); + images.extend( resources .xobjects .iter() - .map(|(_name, &r)| file.get(r).unwrap()) + .map(|(_name, &r)| resolver.get(r).unwrap()) .filter(|o| matches!(**o, pdf::object::XObject::Image(_))), ); @@ -58,7 +63,7 @@ where _ => continue, }; - let data = img.image_data(file)?; + let data = img.image_data(&resolver)?; let img_dict = img.deref().to_owned();