Skip to content

Commit

Permalink
working_copy: Only serialise clock if only clock changed
Browse files Browse the repository at this point in the history
When the working copy is identical to the last snapshot - which is quite
often - only the `watchman_clock` changes. When this happens, the
_entirety_ of the working copy proto is reserialised, including the O(n)
`file_states` field, which is slow.

As non-repeated proto fields always take the last field in the
serialised message ([1]), we can skip the serialisation of the whole
working copy by only serialising the fields that have changed and
appending it to the last serialised proto.

This speeds up most `jj` commands by ~50ms on a large repository:

```
$ hyperfine --sort command --warmup 3 --runs 20 -L bin jj-before,jj-after \
  "target/release/{bin} -R ~/chromiumjj/src show -s @"
Benchmark 1: target/release/jj-before -R ~/chromiumjj/src show -s @
  Time (mean ± σ):     401.5 ms ±   4.5 ms    [User: 227.2 ms, System: 172.9 ms]
  Range (min … max):   394.3 ms … 414.2 ms    20 runs

Benchmark 2: target/release/jj-after -R ~/chromiumjj/src show -s @
  Time (mean ± σ):     347.3 ms ±   4.6 ms    [User: 179.3 ms, System: 166.4 ms]
  Range (min … max):   340.7 ms … 358.0 ms    20 runs

Relative speed comparison
        1.16 ±  0.02  target/release/jj-before -R ~/chromiumjj/src show -s @
        1.00          target/release/jj-after -R ~/chromiumjj/src show -s @
```

[1]: https://protobuf.dev/programming-guides/encoding/#last-one-wins
  • Loading branch information
mlcui-corp committed Jun 20, 2024
1 parent 31b8f07 commit ec52037
Showing 1 changed file with 107 additions and 21 deletions.
128 changes: 107 additions & 21 deletions lib/src/local_working_copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::any::Any;
use std::collections::HashSet;
use std::error::Error;
use std::fs::{File, Metadata, OpenOptions};
use std::io::{Read, Write};
use std::io::{Read, Seek, Write};
use std::ops::Range;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
Expand Down Expand Up @@ -501,6 +501,12 @@ pub enum TreeStateError {
path: PathBuf,
source: std::io::Error,
},
#[error("Copying tree state from {read_path} to temporary file {write_path}")]
CopyTreeState {
read_path: PathBuf,
write_path: PathBuf,
source: std::io::Error,
},
#[error("Persisting tree state to file {path}")]
PersistTreeState {
path: PathBuf,
Expand All @@ -510,6 +516,13 @@ pub enum TreeStateError {
Fsmonitor(#[source] Box<dyn Error + Send + Sync>),
}

#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub enum TreeStateChanged {
Clean,
DirtyOnlyClock,
Dirty,
}

impl TreeState {
pub fn working_copy_path(&self) -> &Path {
&self.working_copy_path
Expand Down Expand Up @@ -671,6 +684,56 @@ impl TreeState {
Ok(())
}

fn save_only_clock(&mut self) -> Result<(), TreeStateError> {
let mut temp_file = NamedTempFile::new_in(&self.state_path).unwrap();
let target_path = self.state_path.join("tree_state");

// If the same non-repeated field appears multiple times, only the last will be used:
// https://protobuf.dev/programming-guides/encoding/#last-one-wins
// Reuse the existing serialisation of the proto in the current state to avoid
// serialising non-changed fields.
let bytes_written = std::fs::copy(&target_path, temp_file.path()).map_err(|err| {
TreeStateError::CopyTreeState {
read_path: target_path.clone(),
write_path: temp_file.path().to_owned(),
source: err,
}
})?;

let file = temp_file.as_file_mut();
file.seek(std::io::SeekFrom::Start(bytes_written))
.map_err(|err| TreeStateError::WriteTreeState {
path: self.state_path.clone(),
source: err,
})?;

let proto = crate::protos::working_copy::TreeState {
watchman_clock: self.watchman_clock.clone(),
..Default::default()
};

// TODO: Deduplicate this with `save()` above.
file.write_all(&proto.encode_to_vec())
.map_err(|err| TreeStateError::WriteTreeState {
path: self.state_path.clone(),
source: err,
})?;
// update own write time while we before we rename it, so we know
// there is no unknown data in it
self.update_own_mtime();
// TODO: Retry if persisting fails (it will on Windows if the file happened to
// be open for read).
temp_file
.persist(&target_path)
.map_err(|tempfile::PersistError { error, file: _ }| {
TreeStateError::PersistTreeState {
path: target_path.clone(),
source: error,
}
})?;
Ok(())
}

fn current_tree(&self) -> BackendResult<MergedTree> {
self.store.get_root_tree(&self.tree_id)
}
Expand Down Expand Up @@ -742,7 +805,7 @@ impl TreeState {
/// Look for changes to the working copy. If there are any changes, create
/// a new tree from it and return it, and also update the dirstate on disk.
#[instrument(skip_all)]
pub fn snapshot(&mut self, options: SnapshotOptions) -> Result<bool, SnapshotError> {
pub fn snapshot(&mut self, options: SnapshotOptions) -> Result<TreeStateChanged, SnapshotError> {
let SnapshotOptions {
base_ignores,
fsmonitor_settings,
Expand All @@ -753,7 +816,12 @@ impl TreeState {
let sparse_matcher = self.sparse_matcher();

let fsmonitor_clock_needs_save = fsmonitor_settings != FsmonitorSettings::None;
let mut is_dirty = fsmonitor_clock_needs_save;
let clean_tree_retval = if fsmonitor_clock_needs_save {
TreeStateChanged::DirtyOnlyClock
} else {
TreeStateChanged::Clean
};
let mut is_dirty = false;
let FsmonitorMatcher {
matcher: fsmonitor_matcher,
watchman_clock,
Expand All @@ -767,7 +835,7 @@ impl TreeState {
if matcher.visit(RepoPath::root()).is_nothing() {
// No need to iterate file states to build empty deleted_files.
self.watchman_clock = watchman_clock;
return Ok(is_dirty);
return Ok(clean_tree_retval);
}

let (tree_entries_tx, tree_entries_rx) = channel();
Expand Down Expand Up @@ -850,7 +918,11 @@ impl TreeState {
assert_eq!(state_paths, tree_paths);
}
self.watchman_clock = watchman_clock;
Ok(is_dirty)
Ok(if is_dirty {
TreeStateChanged::Dirty
} else {
clean_tree_retval
})
}

#[allow(clippy::too_many_arguments)]
Expand Down Expand Up @@ -1554,7 +1626,7 @@ impl WorkingCopy for LocalWorkingCopy {
lock,
old_operation_id,
old_tree_id,
tree_state_dirty: false,
tree_state_dirty: TreeStateChanged::Clean,
}))
}
}
Expand Down Expand Up @@ -1740,7 +1812,7 @@ pub struct LockedLocalWorkingCopy {
lock: FileLock,
old_operation_id: OperationId,
old_tree_id: MergedTreeId,
tree_state_dirty: bool,
tree_state_dirty: TreeStateChanged,
}

impl LockedWorkingCopy for LockedLocalWorkingCopy {
Expand Down Expand Up @@ -1768,7 +1840,7 @@ impl LockedWorkingCopy for LockedLocalWorkingCopy {
message: "Failed to read the working copy state".to_string(),
err: err.into(),
})?;
self.tree_state_dirty |= tree_state.snapshot(options)?;
self.tree_state_dirty = self.tree_state_dirty.max(tree_state.snapshot(options)?);
Ok(tree_state.current_tree_id().clone())
}

Expand All @@ -1784,7 +1856,7 @@ impl LockedWorkingCopy for LockedLocalWorkingCopy {
err: err.into(),
})?
.check_out(&new_tree)?;
self.tree_state_dirty = true;
self.tree_state_dirty = TreeStateChanged::Dirty;
Ok(stats)
}

Expand All @@ -1798,7 +1870,7 @@ impl LockedWorkingCopy for LockedLocalWorkingCopy {
})?
.reset(&new_tree)
.block_on()?;
self.tree_state_dirty = true;
self.tree_state_dirty = TreeStateChanged::Dirty;
Ok(())
}

Expand All @@ -1812,7 +1884,7 @@ impl LockedWorkingCopy for LockedLocalWorkingCopy {
})?
.recover(&new_tree)
.block_on()?;
self.tree_state_dirty = true;
self.tree_state_dirty = TreeStateChanged::Dirty;
Ok(())
}

Expand All @@ -1834,7 +1906,7 @@ impl LockedWorkingCopy for LockedLocalWorkingCopy {
err: err.into(),
})?
.set_sparse_patterns(new_sparse_patterns)?;
self.tree_state_dirty = true;
self.tree_state_dirty = TreeStateChanged::Dirty;
Ok(stats)
}

Expand All @@ -1843,15 +1915,29 @@ impl LockedWorkingCopy for LockedLocalWorkingCopy {
mut self: Box<Self>,
operation_id: OperationId,
) -> Result<Box<dyn WorkingCopy>, WorkingCopyStateError> {
assert!(self.tree_state_dirty || &self.old_tree_id == self.wc.tree_id()?);
if self.tree_state_dirty {
self.wc
.tree_state_mut()?
.save()
.map_err(|err| WorkingCopyStateError {
message: "Failed to write working copy state".to_string(),
err: Box::new(err),
assert!(
self.tree_state_dirty == TreeStateChanged::Dirty
|| &self.old_tree_id == self.wc.tree_id()?
);
match self.tree_state_dirty {
TreeStateChanged::Clean => (),
TreeStateChanged::DirtyOnlyClock => {
self.wc.tree_state_mut()?.save_only_clock().map_err(|err| {
WorkingCopyStateError {
message: "Failed to write working copy clock".to_string(),
err: Box::new(err),
}
})?;
}
TreeStateChanged::Dirty => {
self.wc
.tree_state_mut()?
.save()
.map_err(|err| WorkingCopyStateError {
message: "Failed to write working copy state".to_string(),
err: Box::new(err),
})?;
}
}
if self.old_operation_id != operation_id {
self.wc.checkout_state_mut().operation_id = operation_id;
Expand All @@ -1871,7 +1957,7 @@ impl LockedLocalWorkingCopy {
err: err.into(),
})?
.reset_watchman();
self.tree_state_dirty = true;
self.tree_state_dirty = self.tree_state_dirty.max(TreeStateChanged::DirtyOnlyClock);
Ok(())
}
}
Expand Down

0 comments on commit ec52037

Please sign in to comment.