From 6284e0fb2fa784b2c0994c2c6168af5e263e4f2a Mon Sep 17 00:00:00 2001 From: "Erwin J. van Eijk" <235739+erwinvaneijk@users.noreply.github.com> Date: Sun, 30 Jul 2023 16:38:34 +0200 Subject: [PATCH 1/7] Move to a priority queue * By not using our own heap, we have less code to maintain. --- Cargo.lock | 327 +++++++++++++++++++++++-------------- Cargo.toml | 24 ++- src/fbhash/chunker.rs | 2 +- src/fbhash/heap.rs | 185 --------------------- src/fbhash/mod.rs | 4 +- src/fbhash/similarities.rs | 21 ++- src/fbhash/utils.rs | 8 +- 7 files changed, 231 insertions(+), 340 deletions(-) delete mode 100644 src/fbhash/heap.rs diff --git a/Cargo.lock b/Cargo.lock index 7a0ac8a..17b0e5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,22 +15,31 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.18" +version = "0.7.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "assert_cmd" -version = "1.0.8" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c98233c6673d8601ab23e77eb38f999c51100d46c5703b17288c57fddf3a1ffe" +checksum = "93ae1ddd39efd67689deb1979d80bad3bf7f2b09c6e6117c8d1f2443b5e2f83e" dependencies = [ "bstr", "doc-comment", - "predicates 2.1.1", + "predicates", "predicates-core", "predicates-tree", "wait-timeout", @@ -79,6 +88,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "bumpalo" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" + [[package]] name = "cc" version = "1.0.73" @@ -93,15 +108,17 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.19" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1" dependencies = [ - "libc", + "iana-time-zone", + "js-sys", "num-integer", "num-traits", "serde", "time", + "wasm-bindgen", "winapi", ] @@ -132,37 +149,29 @@ dependencies = [ [[package]] name = "console" -version = "0.14.1" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3993e6445baa160675931ec041a5e03ca84b9c6e32a056150d3aa2bdda0a1f45" +checksum = "89eab4d20ce20cea182308bca13088fecea9c05f6776cf287205d41a0ed3c847" dependencies = [ "encode_unicode", - "lazy_static", "libc", - "regex", + "once_cell", "terminal_size 0.1.17", "unicode-width", "winapi", ] [[package]] -name = "console" -version = "0.15.0" +name = "core-foundation-sys" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31" -dependencies = [ - "encode_unicode", - "libc", - "once_cell", - "terminal_size 0.1.17", - "winapi", -] +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" [[package]] name = "crossbeam-channel" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if", "crossbeam-utils", @@ -170,9 +179,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -181,9 +190,9 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d" +checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" dependencies = [ "autocfg", "cfg-if", @@ -195,9 +204,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" dependencies = [ "cfg-if", "once_cell", @@ -205,9 +214,9 @@ dependencies = [ [[package]] name = "ctor" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f877be4f7c9f246b183111634f75baa039715e3f46ce860677d3b19a69fb229c" +checksum = "cdffe87e1d521a10f9696f833fe502293ea446d7f256c06128293a4119bdf4cb" dependencies = [ "quote", "syn", @@ -219,12 +228,6 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" -[[package]] -name = "difference" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" - [[package]] name = "difflib" version = "0.4.0" @@ -239,9 +242,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" [[package]] name = "encode_unicode" @@ -251,9 +254,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "env_logger" -version = "0.8.4" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272" dependencies = [ "atty", "humantime", @@ -285,9 +288,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" dependencies = [ "instant", ] @@ -300,15 +303,17 @@ dependencies = [ "bincode", "chrono", "clap", - "console 0.14.1", + "console", "env_logger", "float-cmp", "hash_hasher", - "hashbrown 0.11.2", + "hashbrown", "indicatif", "itertools", - "predicates 1.0.8", + "ordered-float", + "predicates", "pretty_assertions", + "priority-queue", "rayon", "serde", "serde_json", @@ -320,9 +325,9 @@ dependencies = [ [[package]] name = "float-cmp" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1267f4ac4f343772758f7b1bdcbe767c218bbab93bb432acbf5162bbf85a6c4" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" dependencies = [ "num-traits", ] @@ -346,21 +351,15 @@ checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" [[package]] name = "hashbrown" -version = "0.11.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ "ahash", "rayon", "serde", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hermit-abi" version = "0.1.19" @@ -376,6 +375,19 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "iana-time-zone" +version = "0.1.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bbaead50122b06e9a973ac20bc7445074d99ad9a0a0654934876908a9cec82c" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "js-sys", + "wasm-bindgen", + "winapi", +] + [[package]] name = "indexmap" version = "1.9.1" @@ -383,20 +395,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" dependencies = [ "autocfg", - "hashbrown 0.12.3", + "hashbrown", ] [[package]] name = "indicatif" -version = "0.16.2" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b" +checksum = "bfddc9561e8baf264e0e45e197fd7696320026eb10a8180340debc27b18f535b" dependencies = [ - "console 0.15.0", - "lazy_static", + "console", "number_prefix", "rayon", - "regex", + "unicode-width", ] [[package]] @@ -416,18 +427,27 @@ checksum = "1ea37f355c05dde75b84bba2d767906ad522e97cd9e2eef2be7a4ab7fb442c06" [[package]] name = "itertools" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" + +[[package]] +name = "js-sys" +version = "0.3.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +dependencies = [ + "wasm-bindgen", +] [[package]] name = "lazy_static" @@ -437,15 +457,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.126" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" - -[[package]] -name = "linked-hash-map" -version = "0.5.6" +version = "0.2.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966" [[package]] name = "linux-raw-sys" @@ -520,9 +534,18 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "once_cell" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" + +[[package]] +name = "ordered-float" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98ffdb14730ed2ef599c65810c15b000896e21e8776b512de0db0c3d7335cc2a" +dependencies = [ + "num-traits", +] [[package]] name = "os_str_bytes" @@ -539,19 +562,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "predicates" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49cfaf7fdaa3bfacc6fa3e7054e65148878354a5cfddcf661df4c851f8021df" -dependencies = [ - "difference", - "float-cmp", - "normalize-line-endings", - "predicates-core", - "regex", -] - [[package]] name = "predicates" version = "2.1.1" @@ -559,8 +569,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5aab5be6e4732b473071984b3164dbbfb7a3674d30ea5ff44410b6bcd960c3c" dependencies = [ "difflib", + "float-cmp", "itertools", + "normalize-line-endings", "predicates-core", + "regex", ] [[package]] @@ -591,20 +604,30 @@ dependencies = [ "yansi", ] +[[package]] +name = "priority-queue" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "815082d99af3acc75a3e67efd2a07f72e67b4e81b4344eb8ca34c6ebf3dfa9c5" +dependencies = [ + "autocfg", + "indexmap", +] + [[package]] name = "proc-macro2" -version = "1.0.40" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" +checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" dependencies = [ "proc-macro2", ] @@ -635,9 +658,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.13" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ "bitflags", ] @@ -676,9 +699,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.35.9" +version = "0.35.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72c825b8aa8010eb9ee99b75f05e10180b9278d161583034d7574c9d617aeada" +checksum = "af895b90e5c071badc3136fc10ff0bcfc98747eadbaf43ed8f214e07ba8f8477" dependencies = [ "bitflags", "errno", @@ -690,9 +713,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" [[package]] name = "same-file" @@ -711,18 +734,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" -version = "1.0.139" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6" +checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.139" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb" +checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" dependencies = [ "proc-macro2", "quote", @@ -731,9 +754,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.82" +version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7" +checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" dependencies = [ "itoa", "ryu", @@ -742,23 +765,24 @@ dependencies = [ [[package]] name = "serde_test" -version = "1.0.139" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc933653ac6800dd970d54829f0646dcfd078bcaae7fbd19c6e58a584564a5de" +checksum = "9c17d2112159132660b4c5399e274f676fb75a2f8d70b7468f18f045b71138ed" dependencies = [ "serde", ] [[package]] name = "serde_yaml" -version = "0.8.26" +version = "0.9.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578a7433b776b56a35785ed5ce9a7e777ac0598aac5a6dd1b4b18a307c7fc71b" +checksum = "8613d593412a0deb7bbd8de9d908efff5a0cb9ccd8f62c641e7b2ed2f57291d1" dependencies = [ "indexmap", + "itoa", "ryu", "serde", - "yaml-rust", + "unsafe-libyaml", ] [[package]] @@ -769,9 +793,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.98" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +checksum = "52205623b1b0f064a4e71182c3b18ae902267282930c6d5462c91b859668426e" dependencies = [ "proc-macro2", "quote", @@ -849,15 +873,21 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.2" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" +checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" [[package]] name = "unicode-width" -version = "0.1.9" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +checksum = "c1e5fa573d8ac5f1a856f8d7be41d390ee973daf97c806b2c1a465e4e1406e68" [[package]] name = "version_check" @@ -897,6 +927,60 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" + [[package]] name = "winapi" version = "0.3.9" @@ -971,15 +1055,6 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" -[[package]] -name = "yaml-rust" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" -dependencies = [ - "linked-hash-map", -] - [[package]] name = "yansi" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index da8048b..b2b8af0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,11 +7,7 @@ homepage = "https://github.com/erwinvaneijk/fbhash" repository = "https://github.com/erwinvaneijk/fbhash" description = "A Similarity Hashing Scheme for Digital Forensics" readme = "README.md" -keywords = [ - "filesystem", - "tool", - "forensics", -] +keywords = ["filesystem", "tool", "forensics"] license = "MIT" categories = ["command-line-utilities", "filesystem", "science"] @@ -31,15 +27,17 @@ codegen-units = 1 panic = "abort" [dependencies] -env_logger = "0.8.3" +env_logger = "0.9.1" itertools = "0.10.0" walkdir = "2" -serde_yaml = "0.8.17" +serde_yaml = "0.9.13" serde_json = "1.0" bincode = "~1.3.3" hash_hasher = "2.0" -console = "0.14.1" +console = "0.15.1" rayon = "1.5.0" +priority-queue = "1.2.3" +ordered-float = "3.1.0" [dependencies.chrono] version = "0.4.19" @@ -54,17 +52,17 @@ version = "1.0" features = ["derive"] [dependencies.indicatif] -version = "0.16.0" +version = "0.17.1" features = ["rayon"] [dependencies.hashbrown] -version = "0.11.2" +version = "0.12.3" features = ["serde", "rayon"] [dev-dependencies] pretty_assertions = "1.3.0" -float-cmp = "0.8.0" +float-cmp = "0.9.0" serde_test = "1.0" tempfile = "3" -assert_cmd = "1.0.3" -predicates = "1" +assert_cmd = "2.0.4" +predicates = "2.1.1" diff --git a/src/fbhash/chunker.rs b/src/fbhash/chunker.rs index 6c2ddc7..e49cc93 100644 --- a/src/fbhash/chunker.rs +++ b/src/fbhash/chunker.rs @@ -129,7 +129,7 @@ impl Iterator for ChunkIterator { } } } - Some(_) => { + Some(_) => { let mut b: Vec = vec![1]; match self.file.read(&mut b) { Ok(0) => None, diff --git a/src/fbhash/heap.rs b/src/fbhash/heap.rs deleted file mode 100644 index 3a97e9c..0000000 --- a/src/fbhash/heap.rs +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2018, Abhishek N V -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -pub struct Heap<'a, T> { - data: Vec>, - capacity: usize, -} - -impl<'a, T> Heap<'a, T> { - pub fn new(capacity: usize) -> Self { - Self { - data: vec![None], - capacity, - } - } - - pub fn len(&self) -> usize { - self.data.len() - 1 - } - - pub fn insert(&mut self, f: f64, item: &'a T) { - if self.len() == 0 { - self.data.push(Some((f, item))); - return; - } else if self.len() < self.capacity { - self.data.push(Some((f, item))); - self.heapify(); - return; - } - if let Some(m) = self.get_max() { - if m > f { - self.extract_max(); - self.data.push(Some((f, item))); - self.heapify(); - } - } - } - - pub fn get_elements(&self) -> Vec<(f64, &T)> { - // let mut sorted = self.data[1..].to_vec().clone(); - let mut sorted = Vec::new(); - for i in 1..self.len() + 1 { - sorted.push(self.data[i].unwrap()) - } - sorted.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); - sorted - } - - pub fn get_max(&self) -> Option { - if self.len() == 0 { - None - } else { - Some(self.data[1].unwrap().0) - } - } - - fn at_idx(&self, idx: usize) -> f64 { - self.data[idx].unwrap().0 - } - - fn heapify(&mut self) { - let parent = |x: usize| -> usize { x / 2 }; - let mut l = self.data.len() - 1; - let mut p = parent(l); - while p > 0 && self.at_idx(p) < self.at_idx(l) { - self.data.swap(l, p); - l = p; - p = parent(l); - } - } - fn extract_max(&mut self) -> Option { - let m = self.get_max(); - if self.data.len() <= 2 { - self.data.pop(); - return m; - } - - // send the last element to the top - if let Some(x) = self.data.pop() { - self.data[1] = x; - } - // now rebalance - let mut idx = 1; - let mut child = idx * 2; - while (child < self.len() && self.at_idx(idx) < self.at_idx(child)) - || (child + 1 < self.len() && self.at_idx(idx) < self.at_idx(child + 1)) - { - if (child + 1 < self.len()) && (self.at_idx(child + 1) > self.at_idx(child)) { - child += 1; - } - self.data.swap(idx, child); - idx = child; - child = idx * 2; - } - m - } -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn test_insert() { - let mut h = Heap::new(10); - for i in 0..6 { - h.insert(i as f64, &0); - } - - assert_eq!(h.len(), 6); - } - - #[test] - fn test_get_elements() { - let mut h = Heap::new(10); - h.insert(7.8, &0); - h.insert(98.78, &0); - h.insert(0.0, &0); - h.insert(1.0, &0); - - assert_eq!( - h.get_elements(), - vec![(0.0, &0), (1.0, &0), (7.8, &0), (98.78, &0)] - ); - } - - #[test] - fn test_extract_max() { - let mut h = Heap::new(10); - - h.insert(42.0, &0); - assert_eq!(h.len(), 1); - match h.extract_max() { - Some(x) => assert!(approx_eq!(f64, x, 42.0, epsilon = 0.0)), - None => panic!(), - } - assert_eq!(h.len(), 0); - - let v = vec![69.42, 34.26, 72.53, 14.69, 29.24, 89.00, 1.72, 94.44, 30.46]; - for i in v { - h.insert(i, &0); - } - - assert_eq!(h.len(), 9); - match h.extract_max() { - Some(x) => assert!(approx_eq!(f64, x, 94.44, epsilon = 0.0_f64)), - None => panic!(), - } - assert_eq!(h.len(), 8); - } - - #[test] - fn test_get_max() { - let mut h = Heap::new(10); - let v: Vec = vec![ - 69.42, 34.26, 72.53, 14.69, 29.24, 89.00, 1.72, 94.44, 30.46, 81.18, - ]; - for i in v { - h.insert(i, &0); - } - - match h.get_max() { - Some(x) => assert!(approx_eq!(f64, x, 94.44, ulps = 2)), - None => panic!(), - } - } -} diff --git a/src/fbhash/mod.rs b/src/fbhash/mod.rs index 40cd182..156fc19 100644 --- a/src/fbhash/mod.rs +++ b/src/fbhash/mod.rs @@ -1,11 +1,9 @@ pub mod chunker; -pub mod heap; - pub mod similarities; pub mod index; pub mod query; -pub mod utils; \ No newline at end of file +pub mod utils; diff --git a/src/fbhash/similarities.rs b/src/fbhash/similarities.rs index 997d113..9d6901c 100644 --- a/src/fbhash/similarities.rs +++ b/src/fbhash/similarities.rs @@ -18,16 +18,16 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +use crate::fbhash::chunker::ChunkIterator; use hashbrown::HashMap; use indicatif::ProgressBar; +use ordered_float::OrderedFloat; +use priority_queue::PriorityQueue; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, BTreeSet}; use std::fs::File; use std::io; -use crate::fbhash::chunker::ChunkIterator; -use crate::fbhash::heap::Heap; - pub fn file_to_chunks(file: File) -> Vec { let chunk_iterator = ChunkIterator::new(file); let chunks: Vec = chunk_iterator.into_iter().map(|e| e.digest).collect(); @@ -260,22 +260,25 @@ pub fn ranked_search( k: usize, progress: &ProgressBar, ) -> Vec<(f64, Document)> { - let mut queue = Heap::new(k); + let mut queue = PriorityQueue::with_capacity(k); documents .iter() .map(|other_doc| { (other_doc, { progress.inc(1); - cosine_distance(&other_doc.digest, doc) + OrderedFloat(cosine_distance(&other_doc.digest, doc)) }) }) .for_each(|(d, score)| { - queue.insert(score, d); + queue.push(d, score); }); let mut result = Vec::new(); - for (similarity, doc) in queue.get_elements() { - result.push((similarity, doc.clone())); - } + queue + .into_sorted_iter() + .into_iter() + .for_each(|(doc, similarity)| { + result.push((similarity.into_inner(), doc.clone())); + }); result } diff --git a/src/fbhash/utils.rs b/src/fbhash/utils.rs index 915eccf..22dc07e 100644 --- a/src/fbhash/utils.rs +++ b/src/fbhash/utils.rs @@ -19,6 +19,7 @@ // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. use indicatif::{ProgressBar, ProgressStyle}; +use std::time::Duration; pub enum OutputFormat { Json, @@ -31,9 +32,10 @@ pub fn create_progress_bar(size: u64) -> ProgressBar { } else { ProgressBar::hidden() }; - let style = ProgressStyle::default_bar() - .template("[{elapsed_precise} {eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"); + //let style = ProgressStyle::default_bar() + // .template("[{elapsed_precise} {eta}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}"); + let style = ProgressStyle::default_bar(); pb.set_style(style); - pb.set_draw_delta(size / 100); + pb.enable_steady_tick(Duration::from_secs(1)); pb } From 953d2d65cacaeb3ab33551902d20daa7d99b9af5 Mon Sep 17 00:00:00 2001 From: "Erwin J. van Eijk" <235739+erwinvaneijk@users.noreply.github.com> Date: Sun, 30 Jul 2023 17:17:14 +0200 Subject: [PATCH 2/7] Update to new version --- Cargo.lock | 616 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 379 insertions(+), 237 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 17b0e5b..f0e2325 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,13 +15,19 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.19" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" dependencies = [ "memchr", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -31,15 +37,22 @@ dependencies = [ "libc", ] +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + [[package]] name = "assert_cmd" -version = "2.0.4" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93ae1ddd39efd67689deb1979d80bad3bf7f2b09c6e6117c8d1f2443b5e2f83e" +checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" dependencies = [ + "anstyle", "bstr", "doc-comment", - "predicates", + "predicates 3.0.3", "predicates-core", "predicates-tree", "wait-timeout", @@ -51,7 +64,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -77,28 +90,34 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" + [[package]] name = "bstr" -version = "0.2.17" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" dependencies = [ - "lazy_static", "memchr", "regex-automata", + "serde", ] [[package]] name = "bumpalo" -version = "3.11.0" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" [[package]] name = "cc" -version = "1.0.73" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" [[package]] name = "cfg-if" @@ -108,13 +127,13 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.22" +version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1" +checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5" dependencies = [ + "android-tzdata", "iana-time-zone", "js-sys", - "num-integer", "num-traits", "serde", "time", @@ -124,17 +143,17 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.22" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "atty", - "bitflags", + "bitflags 1.3.2", "clap_lex", - "indexmap", + "indexmap 1.9.3", "strsim", "termcolor", - "terminal_size 0.2.1", + "terminal_size", "textwrap", ] @@ -149,29 +168,28 @@ dependencies = [ [[package]] name = "console" -version = "0.15.1" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89eab4d20ce20cea182308bca13088fecea9c05f6776cf287205d41a0ed3c847" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" dependencies = [ "encode_unicode", + "lazy_static", "libc", - "once_cell", - "terminal_size 0.1.17", "unicode-width", - "winapi", + "windows-sys 0.45.0", ] [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "crossbeam-channel" -version = "0.5.6" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if", "crossbeam-utils", @@ -179,9 +197,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -190,36 +208,24 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.10" +version = "0.9.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset", - "once_cell", "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.11" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ "cfg-if", - "once_cell", -] - -[[package]] -name = "ctor" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdffe87e1d521a10f9696f833fe502293ea446d7f256c06128293a4119bdf4cb" -dependencies = [ - "quote", - "syn", ] [[package]] @@ -242,9 +248,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "encode_unicode" @@ -254,9 +260,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "env_logger" -version = "0.9.1" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272" +checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" dependencies = [ "atty", "humantime", @@ -265,15 +271,21 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" -version = "0.2.8" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys 0.48.0", ] [[package]] @@ -288,12 +300,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "1.8.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" -dependencies = [ - "instant", -] +checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" [[package]] name = "fbhash" @@ -307,11 +316,11 @@ dependencies = [ "env_logger", "float-cmp", "hash_hasher", - "hashbrown", + "hashbrown 0.12.3", "indicatif", "itertools", "ordered-float", - "predicates", + "predicates 2.1.5", "pretty_assertions", "priority-queue", "rayon", @@ -334,9 +343,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.7" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -360,6 +369,12 @@ dependencies = [ "serde", ] +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -369,6 +384,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + [[package]] name = "humantime" version = "2.1.0" @@ -377,35 +398,57 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "iana-time-zone" -version = "0.1.49" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bbaead50122b06e9a973ac20bc7445074d99ad9a0a0654934876908a9cec82c" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" dependencies = [ "android_system_properties", "core-foundation-sys", + "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "winapi", + "windows", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", ] [[package]] name = "indexmap" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", ] [[package]] name = "indicatif" -version = "0.17.1" +version = "0.17.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfddc9561e8baf264e0e45e197fd7696320026eb10a8180340debc27b18f535b" +checksum = "8ff8cc23a7393a397ed1d7f56e6365cba772aba9f9912ab968b03043c395d057" dependencies = [ "console", + "instant", "number_prefix", + "portable-atomic", "rayon", "unicode-width", ] @@ -421,9 +464,14 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "0.7.3" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ea37f355c05dde75b84bba2d767906ad522e97cd9e2eef2be7a4ab7fb442c06" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi 0.3.2", + "libc", + "windows-sys 0.48.0", +] [[package]] name = "itertools" @@ -436,15 +484,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.3" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -457,24 +505,27 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.133" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "linux-raw-sys" -version = "0.0.46" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + +[[package]] +name = "linux-raw-sys" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" [[package]] name = "log" -version = "0.4.17" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" [[package]] name = "memchr" @@ -484,9 +535,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memoffset" -version = "0.6.5" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" dependencies = [ "autocfg", ] @@ -497,32 +548,22 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" -version = "1.13.1" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.2", "libc", ] @@ -534,39 +575,36 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "once_cell" -version = "1.15.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "ordered-float" -version = "3.1.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98ffdb14730ed2ef599c65810c15b000896e21e8776b512de0db0c3d7335cc2a" +checksum = "2fc2dbde8f8a79f2102cc474ceb0ad68e3b80b85289ea62389b60e66777e4213" dependencies = [ "num-traits", ] [[package]] name = "os_str_bytes" -version = "6.3.0" +version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" +checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac" [[package]] -name = "output_vt100" -version = "0.1.3" +name = "portable-atomic" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" -dependencies = [ - "winapi", -] +checksum = "f32154ba0af3a075eefa1eda8bb414ee928f62303a54ea85b8d6638ff1a6ee9e" [[package]] name = "predicates" -version = "2.1.1" +version = "2.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5aab5be6e4732b473071984b3164dbbfb7a3674d30ea5ff44410b6bcd960c3c" +checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" dependencies = [ "difflib", "float-cmp", @@ -576,17 +614,29 @@ dependencies = [ "regex", ] +[[package]] +name = "predicates" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" +dependencies = [ + "anstyle", + "difflib", + "itertools", + "predicates-core", +] + [[package]] name = "predicates-core" -version = "1.0.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da1c2388b1513e1b605fcec39a95e0a9e8ef088f71443ef37099fa9ae6673fcb" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" [[package]] name = "predicates-tree" -version = "1.0.5" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d86de6de25020a36c6d3643a86d9a6a9f552107c0559c60ea03551b5e16c032" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" dependencies = [ "predicates-core", "termtree", @@ -594,61 +644,57 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" dependencies = [ - "ctor", "diff", - "output_vt100", "yansi", ] [[package]] name = "priority-queue" -version = "1.2.3" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815082d99af3acc75a3e67efd2a07f72e67b4e81b4344eb8ca34c6ebf3dfa9c5" +checksum = "fff39edfcaec0d64e8d0da38564fad195d2d51b680940295fcc307366e101e61" dependencies = [ "autocfg", - "indexmap", + "indexmap 1.9.3", ] [[package]] name = "proc-macro2" -version = "1.0.43" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" -version = "1.5.3" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" dependencies = [ - "autocfg", - "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" -version = "1.9.3" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -658,64 +704,74 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] name = "regex" -version = "1.6.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" dependencies = [ "aho-corasick", "memchr", + "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.1.10" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] -name = "remove_dir_all" -version = "0.5.3" +name = "rustix" +version = "0.37.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" dependencies = [ - "winapi", + "bitflags 1.3.2", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", ] [[package]] name = "rustix" -version = "0.35.10" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af895b90e5c071badc3136fc10ff0bcfc98747eadbaf43ed8f214e07ba8f8477" +checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" dependencies = [ - "bitflags", + "bitflags 2.3.3", "errno", - "io-lifetimes", "libc", - "linux-raw-sys", - "windows-sys", + "linux-raw-sys 0.4.3", + "windows-sys 0.48.0", ] [[package]] name = "ryu" -version = "1.0.11" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -728,24 +784,24 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.145" +version = "1.0.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +checksum = "60363bdd39a7be0266a520dab25fdc9241d2f987b08a01e01f0ec6d06a981348" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.145" +version = "1.0.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +checksum = "f28482318d6641454cb273da158647922d1be6b5a2fcc6165cd89ebdd7ed576b" dependencies = [ "proc-macro2", "quote", @@ -754,9 +810,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.85" +version = "1.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" dependencies = [ "itoa", "ryu", @@ -765,20 +821,20 @@ dependencies = [ [[package]] name = "serde_test" -version = "1.0.145" +version = "1.0.176" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c17d2112159132660b4c5399e274f676fb75a2f8d70b7468f18f045b71138ed" +checksum = "5a2f49ace1498612d14f7e0b8245519584db8299541dfe31a06374a828d620ab" dependencies = [ "serde", ] [[package]] name = "serde_yaml" -version = "0.9.13" +version = "0.9.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8613d593412a0deb7bbd8de9d908efff5a0cb9ccd8f62c641e7b2ed2f57291d1" +checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" dependencies = [ - "indexmap", + "indexmap 2.0.0", "itoa", "ryu", "serde", @@ -793,9 +849,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.100" +version = "2.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52205623b1b0f064a4e71182c3b18ae902267282930c6d5462c91b859668426e" +checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" dependencies = [ "proc-macro2", "quote", @@ -804,67 +860,56 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.3.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" dependencies = [ "cfg-if", "fastrand", - "libc", "redox_syscall", - "remove_dir_all", - "winapi", + "rustix 0.38.4", + "windows-sys 0.48.0", ] [[package]] name = "termcolor" -version = "1.1.3" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" dependencies = [ "winapi-util", ] [[package]] name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "terminal_size" -version = "0.2.1" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8440c860cf79def6164e4a0a983bcc2305d82419177a0e0c71930d049e3ac5a1" +checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" dependencies = [ - "rustix", - "windows-sys", + "rustix 0.37.23", + "windows-sys 0.48.0", ] [[package]] name = "termtree" -version = "0.2.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "textwrap" -version = "0.15.1" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949517c0cf1bf4ee812e2e07e08ab448e3ae0d23472aee8a06c985f0c8815b16" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" dependencies = [ - "terminal_size 0.2.1", + "terminal_size", ] [[package]] name = "time" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" dependencies = [ "libc", "wasi 0.10.0+wasi-snapshot-preview1", @@ -873,9 +918,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.4" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-width" @@ -885,9 +930,9 @@ checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "unsafe-libyaml" -version = "0.2.4" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e5fa573d8ac5f1a856f8d7be41d390ee973daf97c806b2c1a465e4e1406e68" +checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" [[package]] name = "version_check" @@ -906,12 +951,11 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -929,9 +973,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -939,9 +983,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", @@ -954,9 +998,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -964,9 +1008,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", @@ -977,9 +1021,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "winapi" @@ -1012,48 +1056,146 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.1", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" -version = "0.36.1" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.1", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] +[[package]] +name = "windows-targets" +version = "0.48.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" -version = "0.36.1" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_gnu" -version = "0.36.1" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_i686_msvc" -version = "0.36.1" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_x86_64_gnu" -version = "0.36.1" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "windows_x86_64_msvc" -version = "0.36.1" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "yansi" From d7fbd99f918264ac01959e6b8970e1e54fe130e3 Mon Sep 17 00:00:00 2001 From: "Erwin J. van Eijk" <235739+erwinvaneijk@users.noreply.github.com> Date: Sun, 30 Jul 2023 21:17:55 +0200 Subject: [PATCH 3/7] Add extra test, accept licenses - Added dependencies which had different licenses than previously considered. - Added test for single result. --- deny.toml | 2 ++ tests/integration.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/deny.toml b/deny.toml index d40ba21..84f00df 100644 --- a/deny.toml +++ b/deny.toml @@ -19,7 +19,9 @@ unlicensed = "deny" allow = [ "Apache-2.0 WITH LLVM-exception", "Apache-2.0", + "BSD-3-Clause", "MIT", + "MPL-2.0", "Unicode-DFS-2016", "Unlicense", ] diff --git a/tests/integration.rs b/tests/integration.rs index 4ae00ba..c8023e5 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -75,6 +75,58 @@ testdata/testfile-yes.bin => (0) testdata\\testfile-zero-length\n\n", Ok(()) } +#[test] +fn test_testdata_integration_single_result() -> Result<(), Box> { + let dir = tempdir()?; + let output_state_file = dir.path().join("output_state_file.json"); + let database_file = dir.path().join("database.json"); + let paths = vec!["testdata"]; + let files = vec!["testdata/testfile-yes.bin"]; + let number_of_results = 1; + + let mut index_command = Command::cargo_bin("fbhash")?; + index_command + .arg("index") + .arg("--state") + .arg(output_state_file.clone()) + .arg("--database") + .arg(database_file.to_str().unwrap()) + .arg(paths[0]); + index_command.assert().success(); + + let mut query_command = Command::cargo_bin("fbhash")?; + query_command + .arg("query") + .arg(format!("-n={}", number_of_results)) + .arg("--database") + .arg(database_file.clone()) + .arg("--state") + .arg(output_state_file.clone()) + .arg(files[0]); + + #[cfg(not(target_os = "windows"))] + query_command.assert().success().stdout(format!( + "Similarities for {}\n\ +Results: 1\n\ +testdata/testfile-yes.bin => (0.9999999999999999) testdata/testfile-yes.bin\n\n", + files[0] + )); + + #[cfg(target_os = "windows")] + query_command.assert().success().stdout(format!( + "Similarities for {}\n\ +Results: 3\n\ +testdata/testfile-yes.bin => (0.9999999999999999) testdata\\testfile-yes.bin\n\ +testdata/testfile-yes.bin => (0) testdata\\testfile-zero.bin\n\ +testdata/testfile-yes.bin => (0) testdata\\testfile-zero-length\n\n", + files[0] + )); + + dir.close()?; + Ok(()) +} + + #[test] fn test_testdata_integration_binary() -> Result<(), Box> { let dir = tempdir()?; From 0d375a55119ab78435354c0a7b9807e6446547d1 Mon Sep 17 00:00:00 2001 From: "Erwin J. van Eijk" <235739+erwinvaneijk@users.noreply.github.com> Date: Sun, 30 Jul 2023 21:23:30 +0200 Subject: [PATCH 4/7] Fix clippy warning --- src/fbhash/similarities.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fbhash/similarities.rs b/src/fbhash/similarities.rs index 8dbc6cc..445b0fd 100644 --- a/src/fbhash/similarities.rs +++ b/src/fbhash/similarities.rs @@ -275,7 +275,6 @@ pub fn ranked_search( let mut result = Vec::new(); queue .into_sorted_iter() - .into_iter() .for_each(|(doc, similarity)| { result.push((similarity.into_inner(), doc.clone())); }); From 6a69b0cb288eceadbbbba80d401466c0ae959f59 Mon Sep 17 00:00:00 2001 From: "Erwin J. van Eijk" <235739+erwinvaneijk@users.noreply.github.com> Date: Sun, 30 Jul 2023 21:26:59 +0200 Subject: [PATCH 5/7] Fix extra test on ubuntu and mac --- tests/integration.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration.rs b/tests/integration.rs index c8023e5..7e7605c 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -115,10 +115,8 @@ testdata/testfile-yes.bin => (0.9999999999999999) testdata/testfile-yes.bin\n\n" #[cfg(target_os = "windows")] query_command.assert().success().stdout(format!( "Similarities for {}\n\ -Results: 3\n\ -testdata/testfile-yes.bin => (0.9999999999999999) testdata\\testfile-yes.bin\n\ -testdata/testfile-yes.bin => (0) testdata\\testfile-zero.bin\n\ -testdata/testfile-yes.bin => (0) testdata\\testfile-zero-length\n\n", +Results: 1\n\ +testdata/testfile-yes.bin => (0.9999999999999999) testdata\\testfile-yes.bin\n\n", files[0] )); From 6e8f26b496253fac7f602d161e71d04626f6732c Mon Sep 17 00:00:00 2001 From: "Erwin J. van Eijk" <235739+erwinvaneijk@users.noreply.github.com> Date: Mon, 31 Jul 2023 00:24:36 +0200 Subject: [PATCH 6/7] Add a BinaryHeap based priority queue - add test for correct number of results --- .gitignore | 1 + Cargo.lock | 33 +++--------------------- Cargo.toml | 1 - deny.toml | 1 - src/fbhash/mod.rs | 2 +- src/fbhash/similarities.rs | 53 +++++++++++++++++++++++++------------- tests/integration.rs | 8 +++--- 7 files changed, 44 insertions(+), 55 deletions(-) diff --git a/.gitignore b/.gitignore index 25d0e25..a8a2060 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ database.json index.json more_database.yaml small_test.sh +state.json diff --git a/Cargo.lock b/Cargo.lock index fcc2865..b092b49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -319,13 +319,12 @@ dependencies = [ "env_logger", "float-cmp", "hash_hasher", - "hashbrown 0.14.0", + "hashbrown", "indicatif", "itertools 0.11.0", "ordered-float", "predicates", "pretty_assertions", - "priority-queue", "rayon", "serde", "serde_json", @@ -350,12 +349,6 @@ version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.0" @@ -380,16 +373,6 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.0.0" @@ -397,7 +380,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" dependencies = [ "equivalent", - "hashbrown 0.14.0", + "hashbrown", ] [[package]] @@ -607,16 +590,6 @@ dependencies = [ "yansi", ] -[[package]] -name = "priority-queue" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff39edfcaec0d64e8d0da38564fad195d2d51b680940295fcc307366e101e61" -dependencies = [ - "autocfg", - "indexmap 1.9.3", -] - [[package]] name = "proc-macro2" version = "1.0.66" @@ -789,7 +762,7 @@ version = "0.9.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" dependencies = [ - "indexmap 2.0.0", + "indexmap", "itoa", "ryu", "serde", diff --git a/Cargo.toml b/Cargo.toml index 2eb24f1..3664048 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,6 @@ bincode = "~1.3.3" hash_hasher = "~2.0.3" console = "~0.15.2" rayon = "1.7.0" -priority-queue = "1.2.3" ordered-float = "3.1.0" [dependencies.clap] diff --git a/deny.toml b/deny.toml index 84f00df..262150a 100644 --- a/deny.toml +++ b/deny.toml @@ -21,7 +21,6 @@ allow = [ "Apache-2.0", "BSD-3-Clause", "MIT", - "MPL-2.0", "Unicode-DFS-2016", "Unlicense", ] diff --git a/src/fbhash/mod.rs b/src/fbhash/mod.rs index 156fc19..8d9e934 100644 --- a/src/fbhash/mod.rs +++ b/src/fbhash/mod.rs @@ -6,4 +6,4 @@ pub mod index; pub mod query; -pub mod utils; +pub mod utils; \ No newline at end of file diff --git a/src/fbhash/similarities.rs b/src/fbhash/similarities.rs index 445b0fd..49e7f79 100644 --- a/src/fbhash/similarities.rs +++ b/src/fbhash/similarities.rs @@ -18,13 +18,13 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +use std::cmp::{Ordering, Reverse}; use crate::fbhash::chunker::ChunkIterator; use hashbrown::HashMap; use indicatif::ProgressBar; use ordered_float::OrderedFloat; -use priority_queue::PriorityQueue; use serde::{Deserialize, Serialize}; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, BinaryHeap}; use std::fs::File; use std::io; @@ -254,31 +254,48 @@ impl std::hash::Hash for DocumentCollection { } } +struct DocumentScore { + score: OrderedFloat, + document: Document, +} + +impl Eq for DocumentScore {} + +impl Ord for DocumentScore { + fn cmp(&self, other: &Self) -> Ordering { + self.score.partial_cmp(&other.score).unwrap() + } +} + +impl PartialEq for DocumentScore { + fn eq(&self, other: &Self) -> bool { + self.score == other.score + } +} + +impl PartialOrd for DocumentScore { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + pub fn ranked_search( doc: &[(u64, f64)], documents: &[Document], k: usize, progress: &ProgressBar, ) -> Vec<(f64, Document)> { - let mut queue = PriorityQueue::with_capacity(k); + let mut queue: BinaryHeap> = BinaryHeap::with_capacity(documents.len()); documents .iter() - .map(|other_doc| { - (other_doc, { - progress.inc(1); - OrderedFloat(cosine_similarity(&other_doc.digest, doc)) - }) - }) - .for_each(|(d, score)| { - queue.push(d, score); - }); - let mut result = Vec::new(); - queue - .into_sorted_iter() - .for_each(|(doc, similarity)| { - result.push((similarity.into_inner(), doc.clone())); + .for_each(|other_doc| { + queue.push(Reverse(DocumentScore { + score: OrderedFloat(cosine_similarity(&other_doc.digest, doc)), + document: other_doc.clone(), + })); + progress.inc(1); }); - result + queue.into_sorted_vec().into_iter().take(k).map(|doc_score| (doc_score.0.score.0, doc_score.0.document)).collect::>() } // diff --git a/tests/integration.rs b/tests/integration.rs index 7e7605c..9f6edff 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -107,17 +107,17 @@ fn test_testdata_integration_single_result() -> Result<(), Box (0.9999999999999999) testdata/testfile-yes.bin\n\n", - files[0] + files[0], number_of_results )); #[cfg(target_os = "windows")] query_command.assert().success().stdout(format!( "Similarities for {}\n\ -Results: 1\n\ +Results: {}\n\ testdata/testfile-yes.bin => (0.9999999999999999) testdata\\testfile-yes.bin\n\n", - files[0] + files[0], number_of_results )); dir.close()?; From cb59e5aed06aecd6122fab431586c560a9a0af9c Mon Sep 17 00:00:00 2001 From: "Erwin J. van Eijk" <235739+erwinvaneijk@users.noreply.github.com> Date: Mon, 31 Jul 2023 00:38:35 +0200 Subject: [PATCH 7/7] Move progress update to after progress --- src/fbhash/index.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fbhash/index.rs b/src/fbhash/index.rs index 2f260d2..b3c7229 100644 --- a/src/fbhash/index.rs +++ b/src/fbhash/index.rs @@ -207,8 +207,9 @@ pub fn index_paths( ); } + let res = write_database_state(&updated_results, results_file, config); progress_bar.finish_and_clear(); - write_database_state(&updated_results, results_file, config) + res } #[cfg(test)]