From 0f48f9547e5a1d6f229d6dbb4f168f6d90d7e766 Mon Sep 17 00:00:00 2001 From: Miles Date: Mon, 8 Jan 2024 05:39:56 +0100 Subject: [PATCH] Make libcramjam a better libcramjam w/ C API (#119) * Initial impl of capi feature --------- Co-authored-by: Ben Beasley --- .github/workflows/CI.yml | 2 +- Cargo.lock | 608 +++++++++++++++- Cargo.toml | 1 + cramjam-python/benchmarks/test_bench.py | 3 +- cramjam-python/src/lz4.rs | 45 +- cramjam-python/tests/test_variants.py | 4 - libcramjam/Cargo.lock | 930 ++++++++++++++++++++++++ libcramjam/Cargo.toml | 30 +- libcramjam/LICENSE | 1 + libcramjam/README.md | 7 + libcramjam/cbindgen.toml | 4 + libcramjam/src/brotli.rs | 9 + libcramjam/src/capi.rs | 926 +++++++++++++++++++++++ libcramjam/src/deflate.rs | 7 + libcramjam/src/gzip.rs | 9 + libcramjam/src/lib.rs | 3 + libcramjam/src/lz4.rs | 191 ++++- libcramjam/src/zstd.rs | 6 + 18 files changed, 2708 insertions(+), 78 deletions(-) create mode 100644 libcramjam/Cargo.lock create mode 120000 libcramjam/LICENSE create mode 100644 libcramjam/cbindgen.toml create mode 100644 libcramjam/src/capi.rs diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 75e79f04..d1baf451 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -155,7 +155,7 @@ jobs: - name: Build run: cargo build --release - name: Tests - run: cargo test --no-default-features --release + run: cargo test --no-default-features --release --features capi - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/Cargo.lock b/Cargo.lock index 00e4e754..f0dab373 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + [[package]] name = "alloc-no-stdlib" version = "2.0.4" @@ -72,6 +81,31 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "assert_cmd" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c98233c6673d8601ab23e77eb38f999c51100d46c5703b17288c57fddf3a1ffe" +dependencies = [ + "bstr", + "doc-comment", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -84,6 +118,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + [[package]] name = "brotli" version = "3.3.4" @@ -105,6 +145,17 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata 0.1.10", +] + [[package]] name = "bytesize" version = "1.2.0" @@ -132,6 +183,25 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "cbindgen" +version = "0.24.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b922faaf31122819ec80c4047cc684c6979a087366c069611e33649bf98e18d" +dependencies = [ + "clap 3.2.25", + "heck", + "indexmap", + "log", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 1.0.109", + "tempfile", + "toml", +] + [[package]] name = "cc" version = "1.0.79" @@ -147,6 +217,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "3.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +dependencies = [ + "atty", + "bitflags 1.3.2", + "clap_lex 0.2.4", + "indexmap", + "strsim", + "termcolor", + "textwrap", +] + [[package]] name = "clap" version = "4.2.7" @@ -166,8 +251,8 @@ checksum = "914c8c79fb560f238ef6429439a30023c862f7a28e688c58f7203f12b29970bd" dependencies = [ "anstream", "anstyle", - "bitflags", - "clap_lex", + "bitflags 1.3.2", + "clap_lex 0.4.1", "strsim", ] @@ -180,7 +265,16 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.39", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", ] [[package]] @@ -200,7 +294,7 @@ name = "cramjam-cli" version = "0.1.1" dependencies = [ "bytesize", - "clap", + "clap 4.2.7", "libcramjam", ] @@ -221,6 +315,24 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + [[package]] name = "errno" version = "0.3.1" @@ -242,6 +354,12 @@ dependencies = [ "libc", ] +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + [[package]] name = "flate2" version = "1.0.26" @@ -252,31 +370,93 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + [[package]] name = "indoc" version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" +[[package]] +name = "inline-c" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "340dd3d6102fa919bd20987024a6d84954c36ec691ac1efea37742ee983c8dd5" +dependencies = [ + "assert_cmd", + "cc", + "inline-c-macro", + "lazy_static", + "predicates", + "regex", + "rustc_version", + "target-lexicon 0.11.2", + "tempfile", +] + +[[package]] +name = "inline-c-macro" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f5621ec7adacda881d7c2826c064f5c29c72fd44333f97df61b458a583ae15" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", +] + [[package]] name = "io-lifetimes" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.1", "libc", "windows-sys 0.48.0", ] @@ -287,12 +467,27 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.1", "io-lifetimes", - "rustix", + "rustix 0.37.25", "windows-sys 0.48.0", ] +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + [[package]] name = "jobserver" version = "0.1.26" @@ -302,22 +497,51 @@ dependencies = [ "libc", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" -version = "0.2.149" +version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "libcramjam" -version = "0.1.0" +version = "0.1.5" dependencies = [ "brotli", "bzip2", + "cbindgen", "flate2", + "inline-c", + "libc", + "libdeflater", "lz4", "snap", "zstd", + "zstd-safe 7.0.0", +] + +[[package]] +name = "libdeflate-sys" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67921a7f85100c1559efc3d1c7c472091b7da05f304b4bbd5356f075e97f1cc2" +dependencies = [ + "cc", +] + +[[package]] +name = "libdeflater" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a31b22f662350ec294b13859f935aea772ba7b2bc8776269f4a5627308eab7d" +dependencies = [ + "libdeflate-sys", ] [[package]] @@ -326,6 +550,12 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b64f40e5e03e0d54f03845c8197d0291253cdbedfb1cb46b13c2c117554a9f4c" +[[package]] +name = "linux-raw-sys" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" + [[package]] name = "lock_api" version = "0.4.9" @@ -336,6 +566,12 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + [[package]] name = "lz4" version = "1.24.0" @@ -356,6 +592,12 @@ dependencies = [ "libc", ] +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + [[package]] name = "memoffset" version = "0.9.0" @@ -374,12 +616,33 @@ dependencies = [ "adler", ] +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + [[package]] name = "parking_lot" version = "0.12.1" @@ -398,22 +661,63 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", "windows-sys 0.45.0", ] +[[package]] +name = "pest" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae9cee2a55a544be8b89dc6848072af97a20f2422603c10865be2a42b580fff5" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + [[package]] name = "pkg-config" version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "predicates" +version = "2.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" +dependencies = [ + "difflib", + "float-cmp", + "itertools", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" + +[[package]] +name = "predicates-tree" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" dependencies = [ "unicode-ident", ] @@ -442,7 +746,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a96fe70b176a89cff78f2fa7b3c930081e163d5379b4dcdf993e3ae29ca662e5" dependencies = [ "once_cell", - "target-lexicon", + "target-lexicon 0.12.7", ] [[package]] @@ -464,7 +768,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn", + "syn 2.0.39", ] [[package]] @@ -476,14 +780,14 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.39", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -494,7 +798,60 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.3", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustc_version" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" +dependencies = [ + "semver", ] [[package]] @@ -503,20 +860,88 @@ version = "0.37.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4eb579851244c2c03e7c24f501c3432bed80b8f720af1d6e5b0e0f01555a035" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", - "linux-raw-sys", + "linux-raw-sys 0.3.6", "windows-sys 0.48.0", ] +[[package]] +name = "rustix" +version = "0.38.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" +dependencies = [ + "bitflags 2.4.1", + "errno", + "libc", + "linux-raw-sys 0.4.11", + "windows-sys 0.48.0", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + +[[package]] +name = "serde" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "serde_json" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "smallvec" version = "1.10.0" @@ -537,21 +962,107 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "2.0.15" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "422045212ea98508ae3d28025bc5aaa2bd4a9cdaecd442a08da2ee620ee9ea95" + [[package]] name = "target-lexicon" version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd1ba337640d60c3e96bc6f0638a939b9c9a7f2c316a1598c279828b3d1dc8c5" +[[package]] +name = "tempfile" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall 0.4.1", + "rustix 0.38.25", + "windows-sys 0.48.0", +] + +[[package]] +name = "termcolor" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + +[[package]] +name = "textwrap" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" + +[[package]] +name = "thiserror" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unicode-ident" version = "1.0.8" @@ -570,6 +1081,46 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.45.0" @@ -708,7 +1259,7 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe", + "zstd-safe 5.0.2+zstd.1.5.2", ] [[package]] @@ -721,6 +1272,15 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.8+zstd.1.5.5" diff --git a/Cargo.toml b/Cargo.toml index 636808b0..a150f665 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ edition = "2021" homepage = "https://github.com/milesgranger/pyrus-cramjam" [profile.release] +# strip = true lto = "fat" codegen-units = 1 opt-level = 3 diff --git a/cramjam-python/benchmarks/test_bench.py b/cramjam-python/benchmarks/test_bench.py index 3bdcb7c3..a4edc154 100644 --- a/cramjam-python/benchmarks/test_bench.py +++ b/cramjam-python/benchmarks/test_bench.py @@ -211,7 +211,7 @@ def test_lz4_block(benchmark, file, use_cramjam: bool): [ f for f in FILES - if not (isinstance(f, (FiftyFourMbRandom, FiftyFourMbRepeating)) + if not (isinstance(f, (FiftyFourMbRandom, FiftyFourMbRepeating))) ], ids=lambda val: val.name, ) @@ -285,7 +285,6 @@ def test_bzip2(benchmark, file, use_cramjam: bool): @profile def memory_profile(): - import snappy data = bytearray(FILES[-1].read_bytes()) diff --git a/cramjam-python/src/lz4.rs b/cramjam-python/src/lz4.rs index ba047005..6a70377b 100644 --- a/cramjam-python/src/lz4.rs +++ b/cramjam-python/src/lz4.rs @@ -2,7 +2,6 @@ use crate::exceptions::{CompressionError, DecompressionError}; use crate::io::{AsBytes, RustyBuffer}; use crate::BytesType; -use libcramjam::lz4::lz4::{block, block::CompressionMode}; use pyo3::prelude::*; use pyo3::wrap_pyfunction; use pyo3::PyResult; @@ -76,7 +75,7 @@ pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> P /// /// `output_len` is optional, it's the upper bound length of decompressed data; if it's not provided, /// then it's assumed `store_size=True` was used during compression and length will then be taken -/// from the header. +/// from the header, otherwise it's assumed `store_size=False` was used and no prepended size exists in input /// /// Python Example /// -------------- @@ -84,11 +83,21 @@ pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> P /// >>> cramjam.lz4.decompress_block(compressed_bytes, output_len=Optional[int]) /// ``` #[pyfunction] +#[allow(unused_variables)] pub fn decompress_block(py: Python, data: BytesType, output_len: Option) -> PyResult { let bytes = data.as_bytes(); - py.allow_threads(|| block::decompress(bytes, output_len.map(|v| v as i32))) + + py.allow_threads(|| { + match output_len { + Some(n) => { + let mut buf = vec![0u8; n]; + libcramjam::lz4::block::decompress_into(bytes, &mut buf, Some(false)).map(|_| buf) + } + None => libcramjam::lz4::block::decompress_vec(bytes), + } .map_err(DecompressionError::from_err) .map(RustyBuffer::from) + }) } /// LZ4 _block_ compression. @@ -120,9 +129,7 @@ pub fn compress_block( ) -> PyResult { let bytes = data.as_bytes(); py.allow_threads(|| { - let store_size = store_size.unwrap_or(true); - let mode = compression_mode(mode, compression, acceleration)?; - block::compress(bytes, Some(mode), store_size) + libcramjam::lz4::block::compress_vec(bytes, compression.map(|v| v as _), acceleration, store_size) }) .map_err(CompressionError::from_err) .map(RustyBuffer::from) @@ -139,7 +146,7 @@ pub fn compress_block( pub fn decompress_block_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { let bytes = input.as_bytes(); let out_bytes = output.as_bytes_mut(); - py.allow_threads(|| block::decompress_to_buffer(bytes, None, out_bytes)) + py.allow_threads(|| libcramjam::lz4::block::decompress_into(bytes, out_bytes, Some(true))) .map_err(DecompressionError::from_err) .map(|v| v as _) } @@ -174,32 +181,12 @@ pub fn compress_block_into( let bytes = data.as_bytes(); let out_bytes = output.as_bytes_mut(); py.allow_threads(|| { - let store_size = store_size.unwrap_or(true); - let mode = compression_mode(mode, compression, acceleration)?; - block::compress_to_buffer(bytes, Some(mode), store_size, out_bytes) + libcramjam::lz4::block::compress_into(bytes, out_bytes, compression.map(|v| v as _), acceleration, store_size) }) .map_err(CompressionError::from_err) .map(|v| v as _) } -#[inline] -fn compression_mode( - mode: Option<&str>, - compression: Option, - acceleration: Option, -) -> PyResult { - let m = match mode { - Some(m) => match m { - "default" => CompressionMode::DEFAULT, - "fast" => CompressionMode::FAST(acceleration.unwrap_or(1)), - "high_compression" => CompressionMode::HIGHCOMPRESSION(compression.unwrap_or(9)), - _ => return Err(DecompressionError::new_err(format!("Unrecognized mode '{}'", m))), - }, - None => CompressionMode::DEFAULT, - }; - Ok(m) -} - /// Determine the size of a buffer which is guaranteed to hold the result of block compression, will error if /// data is too long to be compressed by LZ4. /// @@ -210,7 +197,7 @@ fn compression_mode( /// ``` #[pyfunction] pub fn compress_block_bound(src: BytesType) -> PyResult { - block::compress_bound(src.len()).map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + Ok(libcramjam::lz4::block::compress_bound(src.len(), Some(true))) } /// lz4 Compressor object for streaming compression diff --git a/cramjam-python/tests/test_variants.py b/cramjam-python/tests/test_variants.py index 4ee4ca3c..22c5afed 100644 --- a/cramjam-python/tests/test_variants.py +++ b/cramjam-python/tests/test_variants.py @@ -45,14 +45,12 @@ def test_variants_different_dtypes(variant_str, arr): compressed = variant.compress(arr) decompressed = variant.decompress(compressed) assert same_same(bytes(decompressed), arr.tobytes()) - @pytest.mark.parametrize("is_bytearray", (True, False)) @pytest.mark.parametrize("variant_str", VARIANTS) @given(uncompressed=st.binary(min_size=1)) def test_variants_simple(variant_str, is_bytearray, uncompressed: bytes): - variant = getattr(cramjam, variant_str) if is_bytearray: @@ -264,7 +262,6 @@ def test_dunders(Obj, tmp_path_factory, data): ), ) def test_lz4_block(compress_kwargs): - from cramjam import lz4 data = b"howdy neighbor" @@ -287,7 +284,6 @@ def test_lz4_block(compress_kwargs): @given(first=st.binary(), second=st.binary()) def test_gzip_multiple_streams(first: bytes, second: bytes): - out1 = gzip.compress(first) out2 = gzip.compress(second) assert gzip.decompress(out1 + out2) == first + second diff --git a/libcramjam/Cargo.lock b/libcramjam/Cargo.lock new file mode 100644 index 00000000..267e5826 --- /dev/null +++ b/libcramjam/Cargo.lock @@ -0,0 +1,930 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "assert_cmd" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c98233c6673d8601ab23e77eb38f999c51100d46c5703b17288c57fddf3a1ffe" +dependencies = [ + "bstr", + "doc-comment", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "brotli" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata 0.1.10", +] + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "cbindgen" +version = "0.24.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b922faaf31122819ec80c4047cc684c6979a087366c069611e33649bf98e18d" +dependencies = [ + "clap", + "heck", + "indexmap", + "log", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 1.0.109", + "tempfile", + "toml", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "3.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +dependencies = [ + "atty", + "bitflags 1.3.2", + "clap_lex", + "indexmap", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "inline-c" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "340dd3d6102fa919bd20987024a6d84954c36ec691ac1efea37742ee983c8dd5" +dependencies = [ + "assert_cmd", + "cc", + "inline-c-macro", + "lazy_static", + "predicates", + "regex", + "rustc_version", + "target-lexicon", + "tempfile", +] + +[[package]] +name = "inline-c-macro" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f5621ec7adacda881d7c2826c064f5c29c72fd44333f97df61b458a583ae15" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" + +[[package]] +name = "libcramjam" +version = "0.1.1" +dependencies = [ + "brotli", + "bzip2", + "cbindgen", + "flate2", + "inline-c", + "libc", + "lz4", + "snap", + "zstd", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "lz4" +version = "1.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + +[[package]] +name = "pest" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae9cee2a55a544be8b89dc6848072af97a20f2422603c10865be2a42b580fff5" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "predicates" +version = "2.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" +dependencies = [ + "difflib", + "float-cmp", + "itertools", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" + +[[package]] +name = "predicates-tree" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", +] + +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.4.3", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustc_version" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a" +dependencies = [ + "bitflags 2.4.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + +[[package]] +name = "serde" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "serde_json" +version = "1.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "422045212ea98508ae3d28025bc5aaa2bd4a9cdaecd442a08da2ee620ee9ea95" + +[[package]] +name = "tempfile" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "termcolor" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff1bc3d3f05aff0403e8ac0d92ced918ec05b666a43f83297ccef5bea8a3d449" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + +[[package]] +name = "textwrap" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" + +[[package]] +name = "thiserror" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.9+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/libcramjam/Cargo.toml b/libcramjam/Cargo.toml index f85e86bd..f7fc1900 100644 --- a/libcramjam/Cargo.toml +++ b/libcramjam/Cargo.toml @@ -1,14 +1,40 @@ [package] name = "libcramjam" -version = "0.1.0" +version = "0.1.5" edition = "2021" +license = "MIT" +description = "Compression library combining a plethora of algorithms in a similar as possible API" +readme = "README.md" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = [] +capi = ["libc"] [dependencies] snap = "^1" -brotli = { version = "^3", default-features = false, features = ["std"] } +brotli = { version = "^3", default-features = false, features = ["std", "ffi-api"] } bzip2 = "^0.4" lz4 = "^1" flate2 = "^1" +libdeflater = "^1" zstd = "0.11.1+zstd.1.5.2" +zstd-safe = "7.0.0" # NOTE: This is the same dep version as zstd, as they don't re-export +libc = { version = "0.2", optional = true } + +[build-dependencies] +cbindgen = "^0.24" + +[dev-dependencies] +inline-c = "0.1" + +[package.metadata.capi.pkg_config] +strip_include_path_components = 1 + +[package.metadata.capi.library] +rustflags = "-Cpanic=abort" +name = "cramjam" + +[package.metadata.capi.header] +name = "cramjam" +subdirectory = "cramjam" diff --git a/libcramjam/LICENSE b/libcramjam/LICENSE new file mode 120000 index 00000000..ea5b6064 --- /dev/null +++ b/libcramjam/LICENSE @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/libcramjam/README.md b/libcramjam/README.md index ff77dc07..a796b6d8 100644 --- a/libcramjam/README.md +++ b/libcramjam/README.md @@ -2,3 +2,10 @@ # cramjam library A Rust library combining different compression algorithms/libraries in a common (as possible) API. + + +Features: + +- `capi`: Build a C-ABI library. Compatible with [`cargo-c`](https://github.com/lu-zero/cargo-c) + +Pre-compiled libraries available on [![Anaconda-Server Badge](https://anaconda.org/conda-forge/libcramjam/badges/version.svg)](https://anaconda.org/conda-forge/libcramjam) diff --git a/libcramjam/cbindgen.toml b/libcramjam/cbindgen.toml new file mode 100644 index 00000000..38867ee0 --- /dev/null +++ b/libcramjam/cbindgen.toml @@ -0,0 +1,4 @@ +language = "C" +cpp_compat = true +include_version = true +namespace = "cramjam" diff --git a/libcramjam/src/brotli.rs b/libcramjam/src/brotli.rs index 6302bac1..f4d9734d 100644 --- a/libcramjam/src/brotli.rs +++ b/libcramjam/src/brotli.rs @@ -25,3 +25,12 @@ pub fn compress(input: R, output: &mut W, level: Opt let n_bytes = std::io::copy(&mut encoder, output)?; Ok(n_bytes as usize) } + +pub fn make_write_compressor(w: W, level: Option) -> brotli::CompressorWriter { + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); + brotli::CompressorWriter::new(w, BUF_SIZE, level, LGWIN) +} + +pub fn compress_bound(input_len: usize) -> usize { + brotli::ffi::compressor::BrotliEncoderMaxCompressedSize(input_len) +} diff --git a/libcramjam/src/capi.rs b/libcramjam/src/capi.rs new file mode 100644 index 00000000..12dc2844 --- /dev/null +++ b/libcramjam/src/capi.rs @@ -0,0 +1,926 @@ +use libc::c_void; + +use std::ffi::{c_char, CString}; +use std::io::Cursor; +use std::io::Write; +use std::slice; + +use crate::{brotli, bzip2, deflate, gzip, lz4, snappy, zstd}; + +#[repr(C)] +pub struct Buffer { + data: *const u8, + len: usize, + owned: bool, +} + +impl Buffer { + pub fn empty() -> Self { + Buffer { + data: std::ptr::null(), + len: 0, + owned: false, + } + } +} + +impl From<&Vec> for Buffer { + fn from(v: &Vec) -> Self { + Buffer { + data: v.as_ptr(), + len: v.len(), + owned: false, + } + } +} +impl From> for Buffer { + fn from(mut v: Vec) -> Self { + v.shrink_to_fit(); + let buffer = Buffer { + data: v.as_ptr(), + len: v.len(), + owned: true, + }; + std::mem::forget(v); + buffer + } +} + +/// All codecs supported by the de/compress and de/compress_into APIs +#[derive(Debug, Copy, Clone)] +#[repr(C)] +pub enum Codec { + #[allow(dead_code)] + Snappy, + #[allow(dead_code)] + SnappyRaw, + #[allow(dead_code)] + Bzip2, + #[allow(dead_code)] + Lz4, + #[allow(dead_code)] + Lz4Block, + #[allow(dead_code)] + Zstd, + #[allow(dead_code)] + Gzip, + #[allow(dead_code)] + Brotli, +} + +/// Streaming only codecs, which can create De/Compressors using the de/compressor APIs +#[derive(Debug)] +#[repr(C)] +pub enum StreamingCodec { + #[allow(dead_code)] + StreamingBzip2, + #[allow(dead_code)] + StreamingSnappy, + #[allow(dead_code)] + StreamingLz4, + #[allow(dead_code)] + StreamingZstd, + #[allow(dead_code)] + StreamingGzip, + #[allow(dead_code)] + StreamingBrotli, +} + +type SnappyFrameCompressor = snappy::snap::write::FrameEncoder>; +type Bzip2Compressor = bzip2::bzip2::write::BzEncoder>; +type Lz4Compressor = crate::lz4::lz4::Encoder>; +type GzipCompressor = crate::gzip::flate2::write::GzEncoder>; +type BrotliCompressor = brotli::brotli::CompressorWriter>; +type ZstdCompressor<'a> = crate::zstd::zstd::Encoder<'a, Vec>; + +type Decompressor = Cursor>; + +// Set the error string to a error message pointer +#[inline(always)] +fn error_to_ptr(err: impl ToString, ptr: &mut *mut c_char) { + let err_msg = CString::new(err.to_string()).unwrap(); + *ptr = err_msg.into_raw(); +} + +/// Safe to call on a nullptr +#[no_mangle] +pub extern "C" fn free_string(ptr: *mut c_char) { + if !ptr.is_null() { + let _ = unsafe { CString::from_raw(ptr) }; + } +} + +#[no_mangle] +pub extern "C" fn free_buffer(buf: Buffer) { + if !buf.data.is_null() && buf.owned { + let _ = unsafe { Vec::from_raw_parts(buf.data as *mut u8, buf.len, buf.len) }; + } +} + +#[no_mangle] +pub extern "C" fn decompress( + codec: Codec, + input: *const u8, + input_len: usize, + nbytes_read: &mut usize, + nbytes_written: &mut usize, + error: &mut *mut c_char, +) -> Buffer { + let mut decompressed = Cursor::new(vec![]); + let mut compressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); + let ret = match codec { + Codec::Snappy => snappy::decompress(&mut compressed, &mut decompressed), + Codec::SnappyRaw => snappy::raw::decompress_vec(compressed.get_ref()).map(|v| { + let len = v.len(); + *decompressed.get_mut() = v; + decompressed.set_position(len as _); + compressed.set_position(input_len as _); // todo, assuming it read the whole thing + len + }), + Codec::Bzip2 => bzip2::decompress(&mut compressed, &mut decompressed), + Codec::Brotli => brotli::decompress(&mut compressed, &mut decompressed), + Codec::Gzip => gzip::decompress(&mut compressed, &mut decompressed), + Codec::Zstd => zstd::decompress(&mut compressed, &mut decompressed), + Codec::Lz4 => lz4::decompress(&mut compressed, &mut decompressed), + Codec::Lz4Block => lz4::block::decompress_vec(compressed.get_ref()).map(|v| { + let len = v.len(); + *decompressed.get_mut() = v; + decompressed.set_position(len as _); + compressed.set_position(input_len as _); // todo, assuming it read the whole thing + len + }), + }; + match ret { + Ok(n) => { + *nbytes_read = compressed.position() as usize; + *nbytes_written = n; + match decompressed.flush() { + Ok(_) => Buffer::from(decompressed.into_inner()), + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } + } + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } +} + +#[no_mangle] +pub extern "C" fn compress( + codec: Codec, + level: i32, + input: *const u8, + input_len: usize, + nbytes_read: &mut usize, + nbytes_written: &mut usize, + error: &mut *mut c_char, +) -> Buffer { + if level < 0 { + error_to_ptr("Requires compression >= 0", error); + return Buffer::empty(); + } + let level = Some(level as _); + let mut compressed = Cursor::new(vec![]); + let mut decompressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); + let ret = match codec { + Codec::Snappy => snappy::compress(&mut decompressed, &mut compressed), + Codec::SnappyRaw => snappy::raw::compress_vec(decompressed.get_ref()).map(|v| { + let len = v.len(); + *compressed.get_mut() = v; + compressed.set_position(len as _); + decompressed.set_position(input_len as _); + len + }), + Codec::Bzip2 => bzip2::compress(&mut decompressed, &mut compressed, level), + Codec::Brotli => brotli::compress(&mut decompressed, &mut compressed, level), + Codec::Gzip => gzip::compress(&mut decompressed, &mut compressed, level), + Codec::Zstd => zstd::compress(&mut decompressed, &mut compressed, level.map(|v| v as i32)), + Codec::Lz4 => lz4::compress(&mut decompressed, &mut compressed, level), + // TODO: Support passing acceleration + Codec::Lz4Block => lz4::block::compress_vec(decompressed.get_ref(), level, None, Some(true)).map(|v| { + let len = v.len(); + *compressed.get_mut() = v; + compressed.set_position(len as _); + decompressed.set_position(input_len as _); + len + }), // TODO + }; + match ret { + Ok(n) => { + *nbytes_read = decompressed.get_ref().len(); + *nbytes_written = n; + match compressed.flush() { + Ok(_) => Buffer::from(compressed.into_inner()), + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } + } + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } +} + +#[no_mangle] +pub extern "C" fn decompress_into( + codec: Codec, + input: *const u8, + input_len: usize, + output: *mut u8, + output_len: usize, + nbytes_read: &mut usize, + nbytes_written: &mut usize, + error: &mut *mut c_char, +) { + let mut compressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); + let mut decompressed = Cursor::new(unsafe { std::slice::from_raw_parts_mut(output, output_len) }); + + let ret = match codec { + Codec::Snappy => snappy::decompress(&mut compressed, &mut decompressed), + Codec::SnappyRaw => snappy::raw::decompress(compressed.get_ref(), decompressed.get_mut()), + Codec::Bzip2 => bzip2::decompress(&mut compressed, &mut decompressed), + Codec::Brotli => brotli::decompress(&mut compressed, &mut decompressed), + Codec::Gzip => gzip::decompress(&mut compressed, &mut decompressed), + Codec::Zstd => zstd::decompress(&mut compressed, &mut decompressed), + Codec::Lz4 => lz4::decompress(&mut compressed, &mut decompressed), + Codec::Lz4Block => lz4::block::decompress_into(&compressed.get_ref(), decompressed.get_mut(), None), + }; + match ret { + Ok(n) => { + *nbytes_written = n; + *nbytes_read = compressed.get_ref().len(); + } + Err(err) => { + error_to_ptr(err, error); + *nbytes_written = 0; + *nbytes_read = 0; + } + } +} + +#[no_mangle] +pub extern "C" fn compress_into( + codec: Codec, + level: i32, + input: *const u8, + input_len: usize, + output: *mut u8, + output_len: usize, + nbytes_read: &mut usize, + nbytes_written: &mut usize, + error: &mut *mut c_char, +) { + let mut decompressed = unsafe { std::slice::from_raw_parts(input, input_len) }; + let mut compressed = unsafe { std::slice::from_raw_parts_mut(output, output_len) }; + + if level < 0 { + error_to_ptr("Requires compression >= 0", error); + return; + } + let level = Some(level as _); + + let ret = match codec { + Codec::Snappy => snappy::compress(&mut decompressed, &mut compressed), + Codec::SnappyRaw => snappy::raw::compress(decompressed, &mut compressed), + Codec::Bzip2 => bzip2::compress(&mut decompressed, &mut compressed, level), + Codec::Brotli => brotli::compress(&mut decompressed, &mut compressed, level), + Codec::Gzip => gzip::compress(&mut decompressed, &mut compressed, level), + Codec::Zstd => zstd::compress(&mut decompressed, &mut compressed, level.map(|v| v as i32)), + Codec::Lz4 => lz4::compress(&mut decompressed, &mut compressed, level), + // TODO: Support passing acceleration + Codec::Lz4Block => lz4::block::compress_into(decompressed, compressed, level, None, Some(true)), + }; + match ret { + Ok(n) => { + *nbytes_written = n; + *nbytes_read = decompressed.len(); + } + Err(err) => { + error_to_ptr(err, error); + *nbytes_written = 0; + *nbytes_read = 0; + } + } +} + +/* ---------- Streaming Compressor --------------- */ +#[no_mangle] +#[allow(unused_variables)] +pub extern "C" fn compressor_init(codec: StreamingCodec, level: i32, error: &mut *mut c_char) -> *mut c_void { + match codec { + StreamingCodec::StreamingBzip2 => { + if level < 0 { + error_to_ptr("Bzip2 requires compression level >= 0", error); + return std::ptr::null_mut(); + } + let compressor = bzip2::bzip2::write::BzEncoder::new(vec![], bzip2::bzip2::Compression::new(level as _)); + Box::into_raw(Box::new(compressor)) as _ + } + StreamingCodec::StreamingBrotli => { + if level < 0 { + error_to_ptr("Brotli requires compression level >= 0", error); + return std::ptr::null_mut(); + } + let compressor = brotli::make_write_compressor(vec![], Some(level as _)); + Box::into_raw(Box::new(compressor)) as _ + } + StreamingCodec::StreamingGzip => { + if level < 1 { + error_to_ptr("Gzip requires compression level >= 1", error); + return std::ptr::null_mut(); + } + let compressor = gzip::flate2::write::GzEncoder::new(vec![], gzip::flate2::Compression::new(level as _)); + Box::into_raw(Box::new(compressor)) as _ + } + StreamingCodec::StreamingZstd => { + let compressor = zstd::zstd::Encoder::new(vec![], level); + Box::into_raw(Box::new(compressor)) as _ + } + StreamingCodec::StreamingSnappy => { + let compressor = snappy::snap::write::FrameEncoder::new(vec![]); + Box::into_raw(Box::new(compressor)) as _ + } + StreamingCodec::StreamingLz4 => { + if level < 0 { + error_to_ptr("Lz4 requires compression level >= 0", error); + return std::ptr::null_mut(); + } + let compressor = lz4::make_write_compressor(vec![], Some(level as _)); + Box::into_raw(Box::new(compressor)) as _ + } + } +} + +#[no_mangle] +pub extern "C" fn free_compressor(codec: StreamingCodec, compressor_ptr: &mut *mut c_void) { + if !(*compressor_ptr).is_null() { + { + match codec { + StreamingCodec::StreamingBzip2 => { + let _ = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; + } + StreamingCodec::StreamingBrotli => { + let _ = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; + } + StreamingCodec::StreamingGzip => { + let _ = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; + } + StreamingCodec::StreamingZstd => { + let _ = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; + } + StreamingCodec::StreamingSnappy => { + let _ = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + } + StreamingCodec::StreamingLz4 => { + let _ = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; + } + } + } + *compressor_ptr = std::ptr::null_mut(); + } +} + +#[no_mangle] +pub extern "C" fn compressor_inner(codec: StreamingCodec, compressor_ptr: &mut *mut c_void) -> Buffer { + match codec { + StreamingCodec::StreamingBzip2 => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; + let buffer = Buffer::from(compressor.get_ref()); + *compressor_ptr = Box::into_raw(compressor) as _; + buffer + } + StreamingCodec::StreamingBrotli => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; + let buffer = Buffer::from(compressor.get_ref()); + *compressor_ptr = Box::into_raw(compressor) as _; + buffer + } + StreamingCodec::StreamingGzip => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; + let buffer = Buffer::from(compressor.get_ref()); + *compressor_ptr = Box::into_raw(compressor) as _; + buffer + } + StreamingCodec::StreamingZstd => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; + let buffer = Buffer::from(compressor.get_ref()); + *compressor_ptr = Box::into_raw(compressor) as _; + buffer + } + StreamingCodec::StreamingSnappy => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + let buffer = Buffer::from(compressor.get_ref()); + *compressor_ptr = Box::into_raw(compressor) as _; + buffer + } + StreamingCodec::StreamingLz4 => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; + let buffer = Buffer::from(compressor.writer()); + *compressor_ptr = Box::into_raw(compressor) as _; + buffer + } + } +} + +/// Finish the decompression stream and return the underlying buffer, transfering ownership to caller +#[no_mangle] +pub extern "C" fn compressor_finish( + codec: StreamingCodec, + compressor_ptr: &mut *mut c_void, + error: &mut *mut c_char, +) -> Buffer { + let buf = match codec { + StreamingCodec::StreamingBzip2 => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; + match compressor.finish() { + Ok(buf) => Buffer::from(buf), + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } + } + StreamingCodec::StreamingBrotli => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; + if let Err(err) = compressor.flush() { + error_to_ptr(err, error); + return Buffer::empty(); + } + Buffer::from(compressor.into_inner()) + } + StreamingCodec::StreamingGzip => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; + match compressor.finish() { + Ok(buf) => Buffer::from(buf), + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } + } + StreamingCodec::StreamingZstd => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; + match compressor.finish() { + Ok(buf) => Buffer::from(buf), + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } + } + StreamingCodec::StreamingSnappy => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + match compressor.into_inner() { + Ok(buf) => Buffer::from(buf), + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } + } + StreamingCodec::StreamingLz4 => { + let compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; + let (w, ret) = compressor.finish(); + match ret { + Ok(_) => Buffer::from(w), + Err(err) => { + error_to_ptr(err, error); + Buffer::empty() + } + } + } + }; + *compressor_ptr = std::ptr::null_mut(); + buf +} + +#[no_mangle] +pub extern "C" fn compressor_flush(codec: StreamingCodec, compressor_ptr: &mut *mut c_void, error: &mut *mut c_char) { + match codec { + StreamingCodec::StreamingBzip2 => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; + if let Err(err) = compressor.flush() { + error_to_ptr(err, error); + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingBrotli => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; + if let Err(err) = compressor.flush() { + error_to_ptr(err, error); + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingGzip => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; + if let Err(err) = compressor.flush() { + error_to_ptr(err, error); + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingZstd => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; + if let Err(err) = compressor.flush() { + error_to_ptr(err, error); + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingSnappy => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + if let Err(err) = compressor.flush() { + error_to_ptr(err, error); + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingLz4 => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; + if let Err(err) = compressor.flush() { + error_to_ptr(err, error); + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + } +} + +#[no_mangle] +pub extern "C" fn compressor_compress( + codec: StreamingCodec, + compressor_ptr: &mut *mut c_void, + input: *const u8, + input_len: usize, + nbytes_read: &mut usize, + nbytes_written: &mut usize, + error: &mut *mut c_char, +) { + let mut decompressed = Cursor::new(unsafe { slice::from_raw_parts(input, input_len) }); + match codec { + StreamingCodec::StreamingBzip2 => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Bzip2Compressor) }; + match std::io::copy(&mut decompressed, &mut compressor) { + Ok(n) => { + *nbytes_written = n as _; + *nbytes_read = decompressed.position() as _; + } + Err(err) => { + error_to_ptr(err, error); + } + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingBrotli => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut BrotliCompressor) }; + match std::io::copy(&mut decompressed, &mut compressor) { + Ok(n) => { + *nbytes_written = n as _; + *nbytes_read = decompressed.position() as _; + } + Err(err) => { + error_to_ptr(err, error); + } + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingGzip => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut GzipCompressor) }; + match std::io::copy(&mut decompressed, &mut compressor) { + Ok(n) => { + *nbytes_written = n as _; + *nbytes_read = decompressed.position() as _; + } + Err(err) => { + error_to_ptr(err, error); + } + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingZstd => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut ZstdCompressor) }; + match std::io::copy(&mut decompressed, &mut compressor) { + Ok(n) => { + *nbytes_written = n as _; + *nbytes_read = decompressed.position() as _; + } + Err(err) => { + error_to_ptr(err, error); + } + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingSnappy => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut SnappyFrameCompressor) }; + match std::io::copy(&mut decompressed, &mut compressor) { + Ok(n) => { + *nbytes_written = n as _; + *nbytes_read = decompressed.position() as _; + } + Err(err) => { + error_to_ptr(err, error); + } + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + StreamingCodec::StreamingLz4 => { + let mut compressor = unsafe { Box::from_raw(*compressor_ptr as *mut Lz4Compressor) }; + match std::io::copy(&mut decompressed, &mut compressor) { + Ok(n) => { + *nbytes_written = n as _; + *nbytes_read = decompressed.position() as _; + } + Err(err) => { + error_to_ptr(err, error); + } + } + *compressor_ptr = Box::into_raw(compressor) as _; + } + } +} +#[no_mangle] +#[allow(unused_variables)] +pub extern "C" fn decompressor_init(codec: StreamingCodec) -> *mut c_void { + // for decompression, we really only need a buffer for storing output + // some streaming codecs, like snappy, don't have a write impl and only a + // read impl for decompressors + let buf: Vec = vec![]; + Box::into_raw(Box::new(Cursor::new(buf))) as _ +} + +#[no_mangle] +#[allow(unused_variables)] +pub extern "C" fn free_decompressor(codec: StreamingCodec, decompressor_ptr: &mut *mut c_void) { + if !(*decompressor_ptr).is_null() { + { + let _ = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; + } + *decompressor_ptr = std::ptr::null_mut(); + } +} + +#[no_mangle] +#[allow(unused_variables)] +pub extern "C" fn decompressor_inner(codec: StreamingCodec, decompressor_ptr: &mut *mut c_void) -> Buffer { + let decompressor = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; + let buf = Buffer::from(decompressor.get_ref()); + *decompressor_ptr = Box::into_raw(decompressor) as _; + buf +} + +/// Finish the decompression stream and return the underlying buffer, transfering ownership to caller +#[no_mangle] +#[allow(unused_variables)] +pub extern "C" fn decompressor_finish( + codec: StreamingCodec, + decompressor_ptr: &mut *mut c_void, + error: &mut *mut c_char, +) -> Buffer { + let mut cursor = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; + if let Err(err) = cursor.flush() { + error_to_ptr(err, error); + return Buffer::empty(); + }; + *decompressor_ptr = std::ptr::null_mut(); + Buffer::from(cursor.into_inner()) +} + +#[no_mangle] +#[allow(unused_variables)] +pub extern "C" fn decompressor_flush( + codec: StreamingCodec, + decompressor_ptr: &mut *mut c_void, + error: &mut *mut c_char, +) { + let mut cursor = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; + if let Err(err) = cursor.flush() { + error_to_ptr(err, error); + } + *decompressor_ptr = Box::into_raw(cursor) as _; +} + +#[no_mangle] +pub extern "C" fn decompressor_decompress( + codec: StreamingCodec, + decompressor_ptr: &mut *mut c_void, + input: *const u8, + input_len: usize, + nbytes_read: &mut usize, + nbytes_written: &mut usize, + error: &mut *mut c_char, +) { + let mut decompressed = unsafe { Box::from_raw(*decompressor_ptr as *mut Decompressor) }; + let start_pos = decompressed.position(); + let mut compressed = Cursor::new(unsafe { std::slice::from_raw_parts(input, input_len) }); + let ret = match codec { + StreamingCodec::StreamingBzip2 => bzip2::decompress(&mut compressed, &mut decompressed), + StreamingCodec::StreamingGzip => gzip::decompress(&mut compressed, &mut decompressed), + StreamingCodec::StreamingBrotli => brotli::decompress(&mut compressed, &mut decompressed), + StreamingCodec::StreamingZstd => zstd::decompress(&mut compressed, &mut decompressed), + StreamingCodec::StreamingSnappy => snappy::decompress(&mut compressed, &mut decompressed), + StreamingCodec::StreamingLz4 => lz4::decompress(&mut compressed, &mut decompressed), + }; + match ret { + Ok(_) => { + *nbytes_read = compressed.position() as _; + *nbytes_written = (decompressed.position() - start_pos) as _; + } + Err(err) => { + error_to_ptr(err, error); + } + }; + *decompressor_ptr = Box::into_raw(decompressed) as _; +} + +/* -------- Codec specific functions ----------*/ +#[no_mangle] +pub extern "C" fn lz4_frame_max_compression_level() -> usize { + lz4::LZ4_ACCELERATION_MAX as _ +} + +#[no_mangle] +pub extern "C" fn lz4_frame_max_compressed_len(input_len: usize, compression_level: i32) -> usize { + lz4::compress_bound(input_len, Some(compression_level as _)) +} + +#[no_mangle] +#[allow(unused_variables)] +pub extern "C" fn lz4_block_max_compressed_len(input_len: usize, error: &mut *mut c_char) -> usize { + lz4::block::compress_bound(input_len, Some(true)) +} + +#[no_mangle] +pub extern "C" fn deflate_max_compressed_len(input_len: usize, level: i32) -> usize { + deflate::compress_bound(input_len, Some(level)) +} + +#[no_mangle] +pub extern "C" fn gzip_max_compressed_len(input_len: usize, level: i32) -> usize { + let level = if level < 0 { 0 } else { level }; + gzip::compress_bound(input_len, Some(level)).unwrap() +} + +#[no_mangle] +pub extern "C" fn zstd_max_compressed_len(input_len: usize) -> usize { + zstd::compress_bound(input_len) +} + +#[no_mangle] +pub extern "C" fn snappy_raw_max_compressed_len(input_len: usize) -> usize { + snap::raw::max_compress_len(input_len) +} + +#[no_mangle] +pub extern "C" fn brotli_max_compressed_len(input_len: usize) -> usize { + brotli::compress_bound(input_len) +} + +#[no_mangle] +pub extern "C" fn snappy_raw_decompressed_len(input: *const u8, input_len: usize, error: &mut *mut c_char) -> isize { + let input = unsafe { slice::from_raw_parts(input, input_len) }; + match snap::raw::decompress_len(input) { + Ok(n) => n as _, + Err(err) => { + error_to_ptr(err, error); + -1 + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const DATA: &[u8; 5] = b"bytes"; + + #[test] + fn test_lz4_frame_max_compressed_len() { + // A known simple test case, expected len taken from lz4/lz4 repo + let len = lz4_frame_max_compressed_len(25, 4); + assert_eq!(len, 65544); + } + + #[test] + fn test_lz4_block_max_compressed_len() { + let mut error: *mut i8 = std::ptr::null_mut(); + let len = lz4_block_max_compressed_len(10, &mut error); + assert!(error.is_null()); + assert_eq!(len, 30); + } + + #[test] + fn test_snappy_raw_max_compressed_len() { + let len = snappy_raw_max_compressed_len(10); + assert_eq!(len, 43); + } + + #[test] + fn test_snappy_raw_decompressed_len() { + let uncompressed = b"bytes"; + let mut compressed = vec![0; snappy_raw_max_compressed_len(uncompressed.len())]; + let nbytes_written = snappy::raw::compress(uncompressed, &mut compressed).unwrap(); + + let mut error: *mut i8 = std::ptr::null_mut(); + let len = snappy_raw_decompressed_len(compressed.as_ptr(), nbytes_written, &mut error); + + assert!(error.is_null()); + assert_eq!(len as usize, uncompressed.len()); + } + + #[test] + fn test_snappy_roundtrip() { + let mut expected = vec![]; + snappy::compress(Cursor::new(DATA), &mut expected).unwrap(); + roundtrip(Codec::Snappy, &expected, 0); + } + #[test] + fn test_snappy_raw_roundtrip() { + let expected = snappy::raw::compress_vec(DATA).unwrap(); + roundtrip(Codec::SnappyRaw, &expected, 0); + } + #[test] + fn test_lz4_roundtrip() { + let mut expected = Cursor::new(vec![]); + lz4::compress(Cursor::new(DATA), &mut expected, Some(6)).unwrap(); + let expected = expected.into_inner(); + roundtrip(Codec::Lz4, &expected, 6); + } + #[test] + fn test_lz4_block_roundtrip() { + let expected = lz4::block::compress_vec(DATA, Some(6), Some(1), Some(true)).unwrap(); + roundtrip(Codec::Lz4Block, &expected, 6); + } + #[test] + fn test_bzip2_roundtrip() { + let mut expected = Cursor::new(vec![]); + bzip2::compress(Cursor::new(DATA), &mut expected, Some(6)).unwrap(); + let expected = expected.into_inner(); + roundtrip(Codec::Bzip2, &expected, 6); + } + #[test] + fn test_brotli_roundtrip() { + let mut expected = Cursor::new(vec![]); + brotli::compress(Cursor::new(DATA), &mut expected, Some(6)).unwrap(); + let expected = expected.into_inner(); + roundtrip(Codec::Brotli, &expected, 6); + } + #[test] + fn test_zstd_roundtrip() { + let mut expected = Cursor::new(vec![]); + zstd::compress(Cursor::new(DATA), &mut expected, Some(6)).unwrap(); + let expected = expected.into_inner(); + roundtrip(Codec::Zstd, &expected, 6); + } + + fn roundtrip(codec: Codec, expected: &[u8], level: i32) { + let mut nbytes_read = 0; + let mut nbytes_written = 0; + let mut error = std::ptr::null_mut(); + let buffer = compress( + codec, + level, + DATA.as_ptr(), + DATA.len(), + &mut nbytes_read, + &mut nbytes_written, + &mut error, + ); + if !error.is_null() { + let error = unsafe { CString::from_raw(error) }; + panic!("Failed: {}", error.to_str().unwrap()); + } + assert_eq!(nbytes_read, DATA.len()); + assert_eq!(nbytes_written, buffer.len); + assert!(buffer.owned); + + // retrieve compressed data and compare to actual rust impl + let compressed = unsafe { Vec::from_raw_parts(buffer.data as *mut u8, buffer.len, buffer.len) }; + assert_eq!(&compressed, expected); + + // And decompress + nbytes_read = 0; + nbytes_written = 0; + + let buffer = decompress( + codec, + compressed.as_ptr(), + compressed.len(), + &mut nbytes_read, + &mut nbytes_written, + &mut error, + ); + if !error.is_null() { + let error = unsafe { CString::from_raw(error) }; + panic!("Failed: {}", error.to_str().unwrap()); + } + assert_eq!(nbytes_read, compressed.len()); + assert_eq!(nbytes_written, buffer.len); + assert_eq!(nbytes_written, DATA.len()); + assert!(buffer.owned); + let decompressed = unsafe { Vec::from_raw_parts(buffer.data as *mut u8, buffer.len, buffer.len) }; + assert_eq!(DATA.as_slice(), &decompressed); + } +} diff --git a/libcramjam/src/deflate.rs b/libcramjam/src/deflate.rs index d2457b36..c946b99f 100644 --- a/libcramjam/src/deflate.rs +++ b/libcramjam/src/deflate.rs @@ -2,11 +2,18 @@ pub use flate2; use flate2::read::{DeflateDecoder, DeflateEncoder}; use flate2::Compression; +use libdeflater; use std::io::prelude::*; use std::io::Error; const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +pub fn compress_bound(input_len: usize, level: Option) -> usize { + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL as _); + let mut c = libdeflater::Compressor::new(libdeflater::CompressionLvl::new(level).unwrap()); + c.deflate_compress_bound(input_len) +} + /// Decompress gzip data #[inline(always)] pub fn decompress(input: R, output: &mut W) -> Result { diff --git a/libcramjam/src/gzip.rs b/libcramjam/src/gzip.rs index ecd28f16..588c2546 100644 --- a/libcramjam/src/gzip.rs +++ b/libcramjam/src/gzip.rs @@ -7,6 +7,15 @@ use std::io::{Cursor, Error}; const DEFAULT_COMPRESSION_LEVEL: u32 = 6; +pub fn compress_bound(input_len: usize, level: Option) -> Result { + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL as _); + let mut c = libdeflater::Compressor::new( + libdeflater::CompressionLvl::new(level) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("{:?}", e)))?, + ); + Ok(c.gzip_compress_bound(input_len)) +} + /// Decompress gzip data #[inline(always)] pub fn decompress(input: R, output: &mut W) -> Result { diff --git a/libcramjam/src/lib.rs b/libcramjam/src/lib.rs index 8dcd5039..d036aa95 100644 --- a/libcramjam/src/lib.rs +++ b/libcramjam/src/lib.rs @@ -6,6 +6,9 @@ pub mod lz4; pub mod snappy; pub mod zstd; +#[cfg(feature = "capi")] +mod capi; + #[cfg(test)] mod tests { diff --git a/libcramjam/src/lz4.rs b/libcramjam/src/lz4.rs index 55bab3c3..bd77fd4a 100644 --- a/libcramjam/src/lz4.rs +++ b/libcramjam/src/lz4.rs @@ -1,31 +1,53 @@ //! lz4 de/compression interface pub use lz4; -use lz4::{Decoder, EncoderBuilder}; -use std::io::{BufReader, Error, Read, Seek, SeekFrom, Write}; +use std::io::{BufReader, Cursor, Error, Read, Write}; -const DEFAULT_COMPRESSION_LEVEL: u32 = 4; +pub const DEFAULT_COMPRESSION_LEVEL: u32 = 4; +pub const LZ4_ACCELERATION_MAX: u32 = 65537; + +#[inline(always)] +pub fn make_write_compressor(output: W, level: Option) -> Result, Error> { + let comp = lz4::EncoderBuilder::new() + .level(level.unwrap_or(DEFAULT_COMPRESSION_LEVEL)) + .auto_flush(true) + .favor_dec_speed(true) + .build(output)?; + Ok(comp) +} /// Decompress lz4 data #[inline(always)] pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = Decoder::new(input)?; + let mut decoder = lz4::Decoder::new(input)?; let n_bytes = std::io::copy(&mut decoder, output)?; decoder.finish().1?; Ok(n_bytes as usize) } +#[inline(always)] +pub fn compress_bound(input_len: usize, level: Option) -> usize { + let mut prefs: std::mem::MaybeUninit = std::mem::MaybeUninit::zeroed(); + let prefs_ptr = prefs.as_mut_ptr(); + unsafe { + std::ptr::write( + std::ptr::addr_of_mut!((*prefs_ptr).compression_level), + level.unwrap_or(DEFAULT_COMPRESSION_LEVEL), + ) + }; + + let n = unsafe { lz4::liblz4::LZ4F_compressBound(input_len, prefs.as_ptr()) }; + unsafe { std::ptr::drop_in_place(std::ptr::addr_of_mut!((*prefs_ptr).compression_level)) }; + n +} + /// Compress lz4 data #[inline(always)] -pub fn compress( - input: R, - output: &mut W, - level: Option, -) -> Result { - let start_pos = output.seek(SeekFrom::Current(0))?; - let mut encoder = EncoderBuilder::new() - .auto_flush(true) - .level(level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL)) - .build(output)?; +pub fn compress(input: R, output: &mut W, level: Option) -> Result { + // Can add an additional constraint to `Seek` for output but that is not great for API + // so very unfortunately, we have an intermediate buffer to get bytes written to output + // as lz4::Encoder is Write only + let out_buffer = vec![]; + let mut encoder = make_write_compressor(out_buffer, level)?; // this returns, bytes read from uncompressed, input; we want bytes written // but lz4 only implements Read for Encoder @@ -33,6 +55,143 @@ pub fn compress( std::io::copy(&mut buf, &mut encoder)?; let (w, r) = encoder.finish(); r?; - let ending_pos = w.seek(SeekFrom::Current(0))?; - Ok((ending_pos - start_pos) as usize) + + // Now copy bytes from temp output buffer to actual output, returning number of bytes written to 'output'. + let nbytes = std::io::copy(&mut Cursor::new(w), output)?; + Ok(nbytes as _) +} + +pub mod block { + use lz4::block::CompressionMode; + use std::io::Error; + + const PREPEND_SIZE: bool = true; + + #[inline(always)] + pub fn compress_bound(input_len: usize, prepend_size: Option) -> usize { + match lz4::block::compress_bound(input_len) { + Ok(len) => { + if prepend_size.unwrap_or(true) { + len + 4 + } else { + len + } + } + Err(_) => 0, + } + } + + /// Decompress into Vec. Must have been compressed with prepended uncompressed size. + /// will panic otherwise. + #[inline(always)] + pub fn decompress_vec(input: &[u8]) -> Result, Error> { + if input.len() < 4 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Input not long enough", + )); + } + let bytes: [u8; 4] = input[..4].try_into().unwrap(); + let len = u32::from_le_bytes(bytes); + let mut buf = vec![0u8; len as usize]; + let nbytes = decompress_into(&input[4..], &mut buf, Some(false))?; + buf.truncate(nbytes); + Ok(buf) + } + + /// NOTE: input is expected to **not** have the size prepended. Calling decompress_into is + /// saying you already know the output buffer min size. `output` can be larger, but it cannot + /// be smaller than what's required. + #[inline(always)] + pub fn decompress_into(input: &[u8], output: &mut [u8], size_prepended: Option) -> Result { + let uncompressed_size = if size_prepended.is_some_and(|v| v) { + None // decompress_to_buffer will read from prepended size + } else { + Some(output.len() as _) + }; + let nbytes = lz4::block::decompress_to_buffer(input, uncompressed_size, output)?; + Ok(nbytes) + } + + #[inline(always)] + pub fn compress_vec( + input: &[u8], + level: Option, + acceleration: Option, + prepend_size: Option, + ) -> Result, Error> { + let len = compress_bound(input.len(), prepend_size); + let mut buffer = vec![0u8; len]; + let nbytes = compress_into(input, &mut buffer, level, acceleration, prepend_size)?; + buffer.truncate(nbytes); + Ok(buffer) + } + + #[inline(always)] + pub fn compress_into( + input: &[u8], + output: &mut [u8], + level: Option, + acceleration: Option, + prepend_size: Option, + ) -> Result { + let prepend_size = prepend_size.unwrap_or(PREPEND_SIZE); + let mode = compression_mode(None, level.map(|v| v as _), acceleration)?; + let nbytes = lz4::block::compress_to_buffer(input, Some(mode), prepend_size, output)?; + Ok(nbytes) + } + + #[inline] + fn compression_mode( + mode: Option<&str>, + compression: Option, + acceleration: Option, + ) -> Result { + let m = match mode { + Some(m) => match m { + "default" => CompressionMode::DEFAULT, + "fast" => CompressionMode::FAST(acceleration.unwrap_or(1)), + "high_compression" => CompressionMode::HIGHCOMPRESSION(compression.unwrap_or(9)), + _ => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Invalid compression string, needed one of 'default', 'fast', or 'high_compression'", + )) + } + }, + None => CompressionMode::DEFAULT, + }; + Ok(m) + } + + #[cfg(test)] + mod tests { + + use super::{compress_vec, decompress_into, decompress_vec}; + + const DATA: &[u8; 14] = b"howdy neighbor"; + + #[test] + fn round_trip_store_size() { + let compressed = compress_vec(DATA, None, None, Some(true)).unwrap(); + let decompressed = decompress_vec(&compressed).unwrap(); + assert_eq!(&decompressed, DATA); + } + #[test] + fn round_trip_no_store_size() { + let compressed = compress_vec(DATA, None, None, Some(false)).unwrap(); + + // decompressed_vec depends on prepended_size, so we can't use that. + assert!(decompress_vec(&compressed).is_err()); + + let mut decompressed = vec![0u8; DATA.len()]; + decompress_into(&compressed, &mut decompressed, Some(false)).unwrap(); + assert_eq!(&decompressed, DATA); + + // decompressed_into will allow a larger output buffer than what's needed + let mut decompressed = vec![0u8; DATA.len() + 5_000]; + let n = decompress_into(&compressed, &mut decompressed, Some(false)).unwrap(); + assert_eq!(&decompressed[..n], DATA); + } + } } diff --git a/libcramjam/src/zstd.rs b/libcramjam/src/zstd.rs index 44b17b12..bece51c4 100644 --- a/libcramjam/src/zstd.rs +++ b/libcramjam/src/zstd.rs @@ -1,9 +1,15 @@ //! zstd de/compression interface use std::io::{Error, Read, Write}; pub use zstd; +pub use zstd_safe; const DEFAULT_COMPRESSION_LEVEL: i32 = 0; +/// Get the max compressed length for a single pass +pub fn compress_bound(len: usize) -> usize { + zstd_safe::compress_bound(len) +} + /// Decompress gzip data #[inline(always)] pub fn decompress(input: R, output: &mut W) -> Result {