diff --git a/CHANGELOG-foldiff.md b/CHANGELOG-foldiff.md index 254e663..ff3f154 100644 --- a/CHANGELOG-foldiff.md +++ b/CHANGELOG-foldiff.md @@ -6,11 +6,12 @@ - replace `anyhow` with custom error types - write custom threading utilities -## pending +## 1.3.0 - `foldiff upgrade` - upgrade older manifests to new ones - move core `foldiff` functionality to `libfoldiff` * significant refactors * decouple logic from `indicatif` and `cliutils` +- use reflinks when copying unchanged files to reduce disk usage on filesystems such as btrfs, xfs, and apfs ## 1.2.0 - switch to FLDF v1.1.0 diff --git a/Cargo.lock b/Cargo.lock index 835b5db..f02691c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -253,7 +253,7 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "foldiff" -version = "1.2.0" +version = "1.3.0" dependencies = [ "anyhow", "clap", @@ -347,7 +347,7 @@ checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" [[package]] name = "libfoldiff" -version = "1.0.0" +version = "1.3.0" dependencies = [ "anyhow", "countio", @@ -356,6 +356,7 @@ dependencies = [ "memmap2", "rand", "rayon", + "reflink", "rmp-serde", "serde", "serde_bytes", @@ -505,6 +506,16 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "reflink" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc585ec28b565b4c28977ce8363a6636cedc280351ba25a7915f6c9f37f68cbe" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "rmp" version = "0.8.14" @@ -689,6 +700,28 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/foldiff/Cargo.toml b/foldiff/Cargo.toml index 942af03..def68ab 100644 --- a/foldiff/Cargo.toml +++ b/foldiff/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "foldiff" authors = ["Hazel Atkinson"] -version = "1.2.0" +version = "1.3.0" edition = "2021" license-file = "../LICENSE.md" description = "A general purpose diffing tool that operates on folders of mixed text/binary files." diff --git a/foldiff/src/main.rs b/foldiff/src/main.rs index 999945c..5c2145d 100644 --- a/foldiff/src/main.rs +++ b/foldiff/src/main.rs @@ -9,9 +9,9 @@ mod cliutils; #[derive(Parser, Debug)] #[command( - version = "v1.2.0", + version = "v1.3.0", about, - long_version = "v1.2.0 + long_version = "v1.3.0 writing fldf v1.1.0 reading fldf 1.0.0-r, v1.1.0" )] diff --git a/libfoldiff/Cargo.toml b/libfoldiff/Cargo.toml index 7cc03c4..f95f752 100644 --- a/libfoldiff/Cargo.toml +++ b/libfoldiff/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "libfoldiff" authors = ["Hazel Atkinson"] -version = "1.0.0" +version = "1.3.0" edition = "2021" license-file = "../LICENSE.md" description = "A general purpose diffing library for the FDLF format." @@ -18,6 +18,7 @@ twox-hash = "1.6.3" zstd = { version = "0.13.2", features = ["zstdmt"] } rayon = "1.10.0" memmap2 = "0.9.4" +reflink = "0.1.3" [dev-dependencies] tempfile = "3.12.0" diff --git a/libfoldiff/src/applying.rs b/libfoldiff/src/applying.rs index d75d557..9640568 100644 --- a/libfoldiff/src/applying.rs +++ b/libfoldiff/src/applying.rs @@ -70,16 +70,30 @@ impl ApplyingDiff { self.manifest.untouched_files .par_iter() .filter_map(|(h, p)| { - // std::fs::copy would be faster, but we want to verify the hash - let mut src = handle_res_parit!(File::open(self.old_root.join(p)), "Failed to open file to copy from {}", p); - let mut dst = handle_res_parit!(create_file(&self.new_root.join(p)), "Failed to create file to copy to {}", p); - - let mut hw = hash::XXHashStreamer::new(&mut dst); - handle_res_parit!(std::io::copy(&mut src, &mut hw), "Failed to copy file {}", p); - - let rh = hw.finish(); - if rh != *h { - return Some(anyhow!("Found {p} was different to expected (hash was {rh}, not {})", *h)); + let h = *h; + let old_path = self.old_root.join(p); + let new_path = self.new_root.join(p); + + let real_hash = + // if we're on *nix, try reflinking + if cfg!(unix) && reflink::reflink(&old_path, &new_path).is_ok() { + // reflinked, check the hash + handle_res_parit!(hash::hash_file(&old_path), "Failed to hash file copied from {}", p) + } + else { + // reflink failed or we're on windows, copy + // copying in kernel space would be slightly faster but we have to check the hash + let mut src = handle_res_parit!(File::open(&old_path), "Failed to open file to copy from {}", p); + let mut dst = handle_res_parit!(create_file(&new_path), "Failed to create file to copy to {}", p); + + let mut hw = hash::XXHashStreamer::new(&mut dst); + handle_res_parit!(std::io::copy(&mut src, &mut hw), "Failed to copy file {}", p); + + hw.finish() + }; + + if real_hash != h { + return Some(anyhow!("Found {p} was different to expected (hash was {real_hash}, not {})", h)); } inc(&bar_untouched); @@ -143,6 +157,7 @@ impl ApplyingDiff { let len = u64::from_be_bytes(*diff_map[blob..].first_chunk().unwrap()) as usize; let blob = blob + 8; // advance past length + // TODO: reflink // copy let mut read = Cursor::new(&diff_map[blob..(blob + len)]); let f = handle_res_parit!(create_file(&self.new_root.join(p)), "Failed to create new file {p} to write to");