From dbb86eb383fcc91e897bb36045ed821667c94c43 Mon Sep 17 00:00:00 2001 From: Patrick Casey Date: Tue, 26 Nov 2024 10:27:42 -0500 Subject: [PATCH] feat: use libgit2 for parsing git information in mitre/git plugin --- Cargo.lock | 1 + plugins/git/Cargo.toml | 1 + plugins/git/src/data.rs | 101 ++- plugins/git/src/main.rs | 131 +-- plugins/git/src/parse.rs | 1276 --------------------------- plugins/git/src/util/command.rs | 38 - plugins/git/src/util/git_command.rs | 260 +++--- plugins/git/src/util/mod.rs | 1 - 8 files changed, 281 insertions(+), 1528 deletions(-) delete mode 100644 plugins/git/src/parse.rs delete mode 100644 plugins/git/src/util/command.rs diff --git a/Cargo.lock b/Cargo.lock index b902b8c3..fc27cef5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1111,6 +1111,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", + "git2", "hipcheck-sdk", "jiff", "log", diff --git a/plugins/git/Cargo.toml b/plugins/git/Cargo.toml index 5050c0ea..d1091182 100644 --- a/plugins/git/Cargo.toml +++ b/plugins/git/Cargo.toml @@ -8,6 +8,7 @@ publish = false [dependencies] anyhow = "1.0.91" clap = { version = "4.5.21", features = ["derive"] } +git2 = "0.19.0" hipcheck-sdk = { path = "../../sdk/rust", features = ["macros"]} jiff = { version = "0.1.14", features = ["serde"] } log = "0.4.22" diff --git a/plugins/git/src/data.rs b/plugins/git/src/data.rs index 7e475785..689a33e5 100644 --- a/plugins/git/src/data.rs +++ b/plugins/git/src/data.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 use hipcheck_sdk::types::LocalGitRepo; +use jiff::Timestamp; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::{ @@ -21,7 +22,7 @@ pub struct DetailedGitRepo { } /// Commits as they come directly out of `git log`. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct RawCommit { pub hash: String, @@ -32,6 +33,29 @@ pub struct RawCommit { pub committed_on: Result, } +impl From> for RawCommit { + fn from(value: git2::Commit<'_>) -> Self { + let hash = value.id().to_string(); + let author = &value.author(); + let committer = &value.committer(); + + let written_time_sec_since_epoch = jiff::Timestamp::from_second( + author.when().seconds() + (author.when().offset_minutes() as i64 * 60), + ); + let commit_time_sec_since_epoch = jiff::Timestamp::from_second( + committer.when().seconds() + (committer.when().offset_minutes() as i64 * 60), + ); + + RawCommit { + hash, + author: author.into(), + written_on: jiff_timestamp_result_to_string(written_time_sec_since_epoch), + committer: committer.into(), + committed_on: jiff_timestamp_result_to_string(commit_time_sec_since_epoch), + } + } +} + /// Commits as understood in Hipcheck's data model. /// The `written_on` and `committed_on` datetime fields contain Strings that are created from `jiff:Timestamps`. /// Because `Timestamp` does not `impl JsonSchema`, we display the datetimes as Strings for passing out of this plugin. @@ -45,6 +69,47 @@ pub struct Commit { pub committed_on: Result, } +fn jiff_timestamp_result_to_string( + timestamp_res: Result, +) -> Result { + match timestamp_res { + Ok(timestamp) => Ok(timestamp.to_string()), + Err(e) => Err(format!( + "Error converting commit author time to Timestamp: {}", + e + )), + } +} + +impl From> for Commit { + fn from(value: git2::Commit) -> Self { + let author = &value.author(); + let committer = &value.author(); + let written_on = jiff::Timestamp::from_second( + author.when().seconds() + (author.when().offset_minutes() as i64 * 60), + ); + let committed_on = jiff::Timestamp::from_second( + committer.when().seconds() + (author.when().offset_minutes() as i64 * 60), + ); + + Self { + hash: value.id().to_string(), + written_on: jiff_timestamp_result_to_string(written_on), + committed_on: jiff_timestamp_result_to_string(committed_on), + } + } +} + +impl From for Commit { + fn from(value: RawCommit) -> Self { + Self { + hash: value.hash, + written_on: value.written_on.map(|x| x.to_string()), + committed_on: value.committed_on.map(|x| x.to_string()), + } + } +} + impl Display for Commit { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}", self.hash) @@ -60,6 +125,15 @@ pub struct Contributor { pub email: String, } +impl From<&git2::Signature<'_>> for Contributor { + fn from(value: &git2::Signature) -> Self { + Self { + name: value.name().unwrap_or_default().to_string(), + email: value.email().unwrap_or_default().to_string(), + } + } +} + impl Display for Contributor { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{} <{}>", self.name, self.email) @@ -98,6 +172,12 @@ pub struct CommitDiff { pub diff: Diff, } +impl CommitDiff { + pub fn new(commit: Commit, diff: Diff) -> Self { + Self { commit, diff } + } +} + impl Display for CommitDiff { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!( @@ -130,7 +210,22 @@ pub struct Diff { #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] pub struct FileDiff { pub file_name: Arc, - pub additions: Option, - pub deletions: Option, + pub additions: i64, + pub deletions: i64, pub patch: String, } + +impl FileDiff { + pub fn increment_additions(&mut self, amount: i64) { + self.additions += amount + } + + pub fn increment_deletions(&mut self, amount: i64) { + self.additions += amount + } + + pub fn add_to_patch(&mut self, contents: &[u8]) { + let contents = String::from_utf8_lossy(contents); + self.patch.push_str(&contents); + } +} diff --git a/plugins/git/src/main.rs b/plugins/git/src/main.rs index ab31f0d4..380f7d1d 100644 --- a/plugins/git/src/main.rs +++ b/plugins/git/src/main.rs @@ -3,7 +3,6 @@ //! Plugin containing secondary queries that return information about a Git repo to another query mod data; -mod parse; mod util; use crate::{ @@ -11,12 +10,14 @@ use crate::{ Commit, CommitContributor, CommitContributorView, CommitDiff, Contributor, ContributorView, DetailedGitRepo, Diff, RawCommit, }, - util::git_command::{get_commits, get_commits_from_date, get_diffs}, + util::git_command::{get_commits_from_date, get_diffs, get_raw_commits}, }; use clap::Parser; use hipcheck_sdk::{prelude::*, types::LocalGitRepo}; +use jiff::Timestamp; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use util::git_command::{get_commit_diffs, get_commits}; /// A locally stored git repo, with a list of additional details /// The details will vary based on the query (e.g. a date, a committer e-mail address, a commit hash) @@ -34,7 +35,7 @@ pub struct BatchGitRepo { /// Returns all raw commits extracted from the repository fn local_raw_commits(repo: LocalGitRepo) -> Result> { - get_commits(&repo.path).map_err(|e| { + get_raw_commits(&repo.path).map_err(|e| { log::error!("failed to get raw commits: {}", e); Error::UnspecifiedQueryState }) @@ -45,7 +46,7 @@ fn local_raw_commits(repo: LocalGitRepo) -> Result> { #[query] async fn last_commit_date(_engine: &mut PluginEngine, repo: LocalGitRepo) -> Result { let path = &repo.path; - let commits = get_commits(path).map_err(|e| { + let commits = get_raw_commits(path).map_err(|e| { log::error!("failed to get raw commits: {}", e); Error::UnspecifiedQueryState })?; @@ -55,10 +56,11 @@ async fn last_commit_date(_engine: &mut PluginEngine, repo: LocalGitRepo) -> Res Error::UnspecifiedQueryState })?; - first.written_on.clone().map_err(|e| { - log::error!("{}", e); - Error::UnspecifiedQueryState - }) + Ok(first + .written_on + .clone() + .map(|x| x.to_string()) + .unwrap_or_else(|e| e)) } /// Returns all diffs extracted from the repository @@ -76,19 +78,10 @@ async fn diffs(_engine: &mut PluginEngine, repo: LocalGitRepo) -> Result Result> { let path = &repo.path; - let raw_commits = get_commits(path).map_err(|e| { + let commits = get_commits(path).map_err(|e| { log::error!("failed to get raw commits: {}", e); Error::UnspecifiedQueryState })?; - let commits = raw_commits - .iter() - .map(|raw| Commit { - hash: raw.hash.to_owned(), - written_on: raw.written_on.to_owned(), - committed_on: raw.committed_on.to_owned(), - }) - .collect(); - Ok(commits) } @@ -108,34 +101,29 @@ async fn commits_from_date( } }; // The called function will return an error if the date is not formatted correctly, so we do not need to check for ahead of time - let raw_commits_from_date = get_commits_from_date(path, &date).map_err(|e| { + let timestamp: Timestamp = date.parse().map_err(|_| { + log::error!("Error parsing provided date '{}' as a timestamp", date); + Error::UnspecifiedQueryState + })?; + let commits_from_data = get_commits_from_date(path, timestamp).map_err(|e| { log::error!("failed to get raw commits from date: {}", e); Error::UnspecifiedQueryState })?; - let commits = raw_commits_from_date - .iter() - .map(|raw| Commit { - hash: raw.hash.to_owned(), - written_on: raw.written_on.to_owned(), - committed_on: raw.committed_on.to_owned(), - }) - .collect(); - - Ok(commits) + Ok(commits_from_data) } /// Returns all contributors to the repository #[query] async fn contributors(_engine: &mut PluginEngine, repo: LocalGitRepo) -> Result> { let path = &repo.path; - let raw_commits = get_commits(path).map_err(|e| { + let raw_commits = get_raw_commits(path).map_err(|e| { log::error!("failed to get raw commits: {}", e); Error::UnspecifiedQueryState })?; - let mut contributors: Vec<_> = raw_commits - .iter() - .flat_map(|raw| [raw.author.to_owned(), raw.committer.to_owned()]) + let mut contributors: Vec = raw_commits + .into_iter() + .flat_map(|raw| [raw.author, raw.committer]) .collect(); contributors.sort(); @@ -146,33 +134,12 @@ async fn contributors(_engine: &mut PluginEngine, repo: LocalGitRepo) -> Result< /// Returns all commit-diff pairs #[query] -async fn commit_diffs(engine: &mut PluginEngine, repo: LocalGitRepo) -> Result> { - let commits = commits(engine, repo.clone()).await.map_err(|e| { - log::error!("failed to get commits: {}", e); - Error::UnspecifiedQueryState - })?; - let diffs = diffs(engine, repo).await.map_err(|e| { - log::error!("failed to get diffs: {}", e); +async fn commit_diffs(_engine: &mut PluginEngine, repo: LocalGitRepo) -> Result> { + let diffs = get_commit_diffs(&repo.path).map_err(|e| { + eprintln!("failed to get commit_diffs: {}", e); Error::UnspecifiedQueryState })?; - - if commits.len() != diffs.len() { - log::error!( - "parsed {} diffs but there are {} commits", - diffs.len(), - commits.len() - ); - return Err(Error::UnspecifiedQueryState); - } - - let commit_diffs = Iterator::zip(commits.iter(), diffs.iter()) - .map(|(commit, diff)| CommitDiff { - commit: commit.clone(), - diff: diff.clone(), - }) - .collect(); - - Ok(commit_diffs) + Ok(diffs) } /// Returns the commits associated with a given contributor (identified by e-mail address in the `details` value) @@ -258,11 +225,7 @@ async fn batch_commits_for_contributor( })?; let commits: Vec = raw_commits .iter() - .map(|raw| Commit { - hash: raw.hash.to_owned(), - written_on: raw.written_on.to_owned(), - committed_on: raw.committed_on.to_owned(), - }) + .map(|raw_commit| Commit::from(raw_commit.clone())) .collect(); // @Assert - raw_commit and commits idxes correspond @@ -401,11 +364,7 @@ async fn batch_contributors_for_commit( .into_iter() .enumerate() .map(|(i, raw)| { - let commit = Commit { - hash: raw.hash.to_owned(), - written_on: raw.written_on.to_owned(), - committed_on: raw.committed_on.to_owned(), - }; + let commit = Commit::from(raw.clone()); let author = raw.author; let committer = raw.committer; hash_to_idx.insert(raw.hash.clone(), i); @@ -438,9 +397,10 @@ async fn commit_contributors( let path = &repo.path; let contributors = contributors(engine, repo.clone()).await.map_err(|e| { log::error!("failed to get contributors: {}", e); + Error::UnspecifiedQueryState })?; - let raw_commits = get_commits(path).map_err(|e| { + let raw_commits = get_raw_commits(path).map_err(|e| { log::error!("failed to get raw commits: {}", e); Error::UnspecifiedQueryState })?; @@ -502,36 +462,3 @@ async fn main() -> Result<()> { let args = Args::try_parse().unwrap(); PluginServer::register(GitPlugin {}).listen(args.port).await } - -#[cfg(test)] -mod test { - #[test] - fn test_no_newline_before_end_of_chunk() { - let input = "diff --git a/plugins/review/plugin.kdl b/plugins/review/plugin.kdl\nindex 83f0355..9fa8e47 100644\n--- a/plugins/review/plugin.kdl\n+++ b/plugins/review/plugin.kdl\n@@ -6,4 +6,4 @@ entrypoint {\n- on arch=\"aarch64-apple-darwin\" \"./hc-mitre-review\"\n- on arch=\"x86_64-apple-darwin\" \"./hc-mitre-review\"\n- on arch=\"x86_64-unknown-linux-gnu\" \"./hc-mitre-review\"\n- on arch=\"x86_64-pc-windows-msvc\" \"./hc-mitre-review\"\n+ on arch=\"aarch64-apple-darwin\" \"./target/debug/review_sdk\"\n+ on arch=\"x86_64-apple-darwin\" \"./target/debug/review_sdk\"\n+ on arch=\"x86_64-unknown-linux-gnu\" \"./target/debug/review_sdk\"\n+ on arch=\"x86_64-pc-windows-msvc\" \"./target/debug/review_sdk\"\n@@ -14 +14 @@ dependencies {\n-}\n\\ No newline at end of file\n+}\n"; - - let (leftover, _parsed) = crate::parse::patch(input).unwrap(); - assert!(leftover.is_empty()); - } - - #[test] - fn test_hyphens_in_diff_stats() { - let input = "0\t4\tsite/content/_index.md\n136\t2\tsite/content/install/_index.md\n-\t-\tsite/static/images/homepage-bg.png\n2\t2\tsite/tailwind.config.js\n2\t0\tsite/templates/bases/base.tera.html\n82\t1\tsite/templates/index.html\n3\t3\tsite/templates/shortcodes/info.html\n15\t14\txtask/src/task/site/serve.rs\n"; - let (leftover, _) = crate::parse::stats(input).unwrap(); - assert!(leftover.is_empty()); - } - - #[test] - fn test_patch_with_only_meta() { - let input = "diff --git a/hipcheck/src/analysis/session/spdx.rs b/hipcheck/src/session/spdx.rs\nsimilarity index 100%\nrename from hipcheck/src/analysis/session/spdx.rs\nrename to hipcheck/src/session/spdx.rs\n"; - let (leftover, _) = crate::parse::patch(input).unwrap(); - assert!(leftover.is_empty()); - } - - #[test] - fn test_patch_without_triple_plus_minus() { - let input = "~~~\n\n0\t0\tmy_test_.py\n\ndiff --git a/my_test_.py b/my_test_.py\ndeleted file mode 100644\nindex e69de29bb2..0000000000\n~~~\n\n33\t3\tnumpy/_core/src/umath/string_fastsearch.h\n\ndiff --git a/numpy/_core/src/umath/string_fastsearch.h b/numpy/_core/src/umath/string_fastsearch.h\nindex 2a778bb86f..1f2d47e8f1 100644\n--- a/numpy/_core/src/umath/string_fastsearch.h\n+++ b/numpy/_core/src/umath/string_fastsearch.h\n@@ -35,0 +36 @@\n+ * @internal\n"; - let (leftover, diffs) = crate::parse::diffs(input).unwrap(); - assert!(leftover.is_empty()); - assert!(diffs.len() == 2); - } -} diff --git a/plugins/git/src/parse.rs b/plugins/git/src/parse.rs deleted file mode 100644 index 6ce4b1aa..00000000 --- a/plugins/git/src/parse.rs +++ /dev/null @@ -1,1276 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -#![allow(dead_code)] - -use crate::data::{Contributor, Diff, FileDiff, RawCommit}; -use anyhow::{Context as _, Error, Result}; -use jiff::Timestamp; -use nom::{ - branch::alt, - bytes::complete::tag, - character::complete::{char as character, digit1, newline, not_line_ending, one_of, space1}, - combinator::{map, opt, peek, recognize}, - error::{Error as NomError, ErrorKind}, - multi::{fold_many0, many0, many1, many_m_n}, - sequence::{preceded, terminated, tuple}, - IResult, -}; -use std::{iter::Iterator, result::Result as StdResult, sync::Arc}; - -const HEX_CHARS: &str = "0123456789abcdef"; -const GIT_HASH_MIN_LEN: usize = 5; -const GIT_HASH_MAX_LEN: usize = 40; - -/// Parse a complete git log. -pub fn git_log(input: &str) -> Result> { - let (_, commits) = commits(input) - .map_err(|e| Error::msg(e.to_string())) - .context("can't parse git log")?; - log::trace!("parsed git commits [commits='{:#?}']", commits); - Ok(commits) -} - -/// Parse a complete set of git diffs. -pub fn git_diff(input: &str) -> Result> { - let (_, diffs) = diffs(input) - .map_err(|e| Error::msg(e.to_string())) - .context("can't parse git diff")?; - log::trace!("parsed git diffs [diffs='{:#?}']", diffs); - Ok(diffs) -} - -/// Parse a complete set of GitHub diffs. -pub fn github_diff(input: &str) -> Result> { - let (_, diffs) = gh_diffs(input) - .map_err(|e| Error::msg(e.to_string())) - .context("can't parse GitHub diff")?; - log::trace!("parsed GitHub diffs [diffs='{:#?}']", diffs); - Ok(diffs) -} - -fn hash(input: &str) -> IResult<&str, &str> { - recognize(many_m_n(GIT_HASH_MIN_LEN, GIT_HASH_MAX_LEN, hex_char))(input) -} - -fn hex_char(input: &str) -> IResult<&str, char> { - one_of(HEX_CHARS)(input) -} - -fn date(input: &str) -> StdResult { - let ts: StdResult = input.parse().map_err(|e| { - format!( - "Could not parse git commit timestamp as RFC3339: '{}'\ - \nCaused by: {}", - input, e - ) - }); - ts.map(|t| t.to_string()) -} - -fn commit(input: &str) -> IResult<&str, RawCommit> { - let (input, hash_str) = line(input)?; - let (input, author_name) = line(input)?; - let (input, author_email) = line(input)?; - let (input, written_on_str) = line(input)?; - let (input, committer_name) = line(input)?; - let (input, committer_email) = line(input)?; - let (input, committed_on_str) = line(input)?; - // At one point our `git log` invocation was configured - // to return GPG key info, but that was leading to errors - // with format and GPG key validation, so we removed it - // from the print specifier - - // There is always an empty line here; ignore it - let (input, _empty_line) = line(input)?; - - let (_, hash) = hash(hash_str).map_err(|e| { - log::error!("failed to parse git commit hash [err='{}']", e); - e - })?; - - let written_on = date(written_on_str); - let committed_on = date(committed_on_str); - - let short_hash = &hash[..8]; - - if let Err(e) = &written_on { - log::error!( - "git commit has invalid written_on timestamp [commit={}, error=\"{}\"]", - short_hash, - e - ); - } - - if let Err(e) = &committed_on { - log::error!( - "git commit has invalid committed_on timestamp [commit={}, error=\"{}\"]", - short_hash, - e - ); - } - - let commit = RawCommit { - hash: hash.to_owned(), - author: Contributor { - name: author_name.to_owned(), - email: author_email.to_owned(), - }, - written_on, - committer: Contributor { - name: committer_name.to_owned(), - email: committer_email.to_owned(), - }, - committed_on, - }; - - Ok((input, commit)) -} - -fn commits(input: &str) -> IResult<&str, Vec> { - many0(commit)(input) -} - -fn line_ending(input: &str) -> IResult<&str, &str> { - recognize(alt(( - recognize(character('\n')), - recognize(tuple((character('\r'), character('\n')))), - )))(input) -} - -fn line(input: &str) -> IResult<&str, &str> { - terminated(not_line_ending, line_ending)(input) -} - -fn num(input: &str) -> IResult<&str, i64> { - digit1(input).map(|(input, output)| { - // Unwrap here is fine because we know it's only going to be - // a bunch of digits. Overflow is possible but we're choosing - // not to worry about it for now, because if a commit is large - // enough that the number of lines added or deleted - // in a single file overflows an i64 we have bigger problems. - (input, output.parse().unwrap()) - }) -} - -fn num_or_dash(input: &str) -> IResult<&str, Option> { - let some_num = map(num, Some); - let dash = map(character('-'), |_| None); - alt((some_num, dash))(input) -} - -fn stat(input: &str) -> IResult<&str, Option>> { - tuple((num_or_dash, space1, num_or_dash, space1, line))(input).map( - |(i, (lines_added, _, lines_deleted, _, file_name))| { - let Some(lines_added) = lines_added else { - return (i, None); - }; - - let Some(lines_deleted) = lines_deleted else { - return (i, None); - }; - - let stat = Stat { - lines_added, - lines_deleted, - file_name, - }; - - (i, Some(stat)) - }, - ) -} - -pub(crate) fn stats(input: &str) -> IResult<&str, Vec>> { - map(many0(stat), |vec| { - vec.into_iter().flatten().collect::>() - })(input) -} - -pub(crate) fn opt_rest_diff_header(input: &str) -> IResult<&str, Diff> { - opt(tuple((newline, diff)))(input).map(|(i, x)| { - if let Some((_, d)) = x { - (i, d) - } else { - ( - i, - Diff { - additions: None, - deletions: None, - file_diffs: vec![], - }, - ) - } - }) -} - -// Some empty commits have no output in the corresponding `git log` command, so we had to add a -// special header to be able to parse and recognize empty diffs and thus make the number of diffs -// and commits equal -pub(crate) fn diff_header(input: &str) -> IResult<&str, Diff> { - tuple((tag("~~~\n"), opt_rest_diff_header))(input).map(|(i, (_, diff))| (i, diff)) -} - -pub(crate) fn diff(input: &str) -> IResult<&str, Diff> { - log::trace!("input is {:#?}", input); - tuple((stats, line, patches))(input).map(|(i, (stats, _, patches))| { - log::trace!("patches are {:#?}", patches); - let mut additions = Some(0); - let mut deletions = Some(0); - - let file_diffs = Iterator::zip(stats.into_iter(), patches) - .map(|(stat, patch)| { - log::trace!( - "stat is {:#?} added and {:#?} deleted", - stat.lines_added, - stat.lines_deleted - ); - additions = additions.map(|a| a + stat.lines_added); - deletions = deletions.map(|d| d + stat.lines_deleted); - - FileDiff { - file_name: Arc::new(stat.file_name.to_owned()), - additions: Some(stat.lines_added), - deletions: Some(stat.lines_deleted), - patch, - } - }) - .collect::>(); - - let diff = Diff { - additions, - deletions, - file_diffs, - }; - - (i, diff) - }) -} - -fn gh_diff(input: &str) -> IResult<&str, Diff> { - // Handle reaching the end of the diff text without causing map0 to error - if input.is_empty() { - return Err(nom::Err::Error(NomError::new(input, ErrorKind::Many0))); - } - - patches_with_context(input).map(|(i, patches)| { - log::trace!("patches are {:#?}", patches); - - // GitHub diffs don't provide these. - let additions = None; - let deletions = None; - - let file_diffs = patches - .into_iter() - .map(|patch| FileDiff { - file_name: Arc::new(patch.file_name), - additions: None, - deletions: None, - patch: patch.content, - }) - .collect(); - log::trace!("file_diffs are {:#?}", file_diffs); - - let diff = Diff { - additions, - deletions, - file_diffs, - }; - log::trace!("diff is {:#?}", diff); - - (i, diff) - }) -} - -pub(crate) fn diffs(input: &str) -> IResult<&str, Vec> { - many0(diff_header)(input) -} - -fn gh_diffs(input: &str) -> IResult<&str, Vec> { - log::trace!("input is {}", input); - many0(gh_diff)(input) -} - -fn meta(input: &str) -> IResult<&str, &str> { - recognize(tuple((single_alpha, line)))(input) -} - -pub(crate) fn metas(input: &str) -> IResult<&str, Vec<&str>> { - many1(meta)(input) -} - -fn single_alpha(input: &str) -> IResult<&str, &str> { - recognize(one_of( - "qwertyuioplokjhgfdsazxcvbnmQWERTYUIOPLOKJHGFDSAZXCVBNM", - ))(input) -} - -fn triple_plus_minus_line(input: &str) -> IResult<&str, &str> { - recognize(tuple((alt((tag("+++"), tag("---"))), line)))(input) -} - -pub(crate) fn patch_header(input: &str) -> IResult<&str, &str> { - recognize(tuple(( - metas, - opt(triple_plus_minus_line), - opt(triple_plus_minus_line), - )))(input) -} - -fn chunk_prefix(input: &str) -> IResult<&str, &str> { - recognize(one_of("+-\\"))(input) -} - -fn line_with_ending(input: &str) -> IResult<&str, &str> { - recognize(tuple((not_line_ending, line_ending)))(input) -} - -fn chunk_line(input: &str) -> IResult<&str, &str> { - preceded(chunk_prefix, line_with_ending)(input) -} - -fn chunk_body(input: &str) -> IResult<&str, String> { - fold_many0(chunk_line, String::new, |mut patch, line| { - if line == " No newline at end of file\n" { - return patch; - } - - patch.push_str(line); - patch - })(input) -} - -fn chunk_header(input: &str) -> IResult<&str, &str> { - recognize(tuple((peek(character('@')), line)))(input) -} - -fn chunk(input: &str) -> IResult<&str, String> { - preceded(chunk_header, chunk_body)(input) -} - -fn chunks(input: &str) -> IResult<&str, String> { - fold_many0(chunk, String::new, |mut patch, line| { - patch.push_str(&line); - patch - })(input) -} - -fn no_newline(input: &str) -> IResult<&str, &str> { - recognize(tuple((peek(character('\\')), line)))(input) -} - -fn patch_footer(input: &str) -> IResult<&str, Option<&str>> { - opt(no_newline)(input) -} - -pub(crate) fn patch(input: &str) -> IResult<&str, String> { - tuple((patch_header, opt(chunks), patch_footer))(input) - .map(|(i, (_, chunks, _))| (i, chunks.unwrap_or_else(String::new))) -} - -fn gh_meta(input: &str) -> IResult<&str, &str> { - recognize(tuple((single_alpha, line)))(input) -} - -fn gh_metas(input: &str) -> IResult<&str, Vec<&str>> { - many1(gh_meta)(input) -} - -fn gh_patch_header(input: &str) -> IResult<&str, &str> { - recognize(tuple((gh_metas, line, line)))(input) -} - -fn chunk_line_with_context(input: &str) -> IResult<&str, &str> { - recognize(tuple((not_line_ending, line_ending)))(input).and_then(|(i, parsed)| { - if parsed.starts_with("diff --git") { - Err(nom::Err::Error(NomError::new(i, ErrorKind::Many0))) - } else { - Ok((i, parsed)) - } - }) -} - -fn chunk_body_with_context(input: &str) -> IResult<&str, String> { - fold_many0(chunk_line_with_context, String::new, |mut patch, line| { - if line.starts_with('+') || line.starts_with('-') { - // Omit the first character. - patch.push_str(&line[1..]); - } - - patch - })(input) -} - -fn chunk_with_context(input: &str) -> IResult<&str, String> { - preceded(chunk_header, chunk_body_with_context)(input) -} - -fn chunks_with_context(input: &str) -> IResult<&str, String> { - fold_many0(chunk_with_context, String::new, |mut patch, line| { - patch.push_str(&line); - patch - })(input) -} - -fn patch_with_context(input: &str) -> IResult<&str, GhPatch> { - tuple((gh_patch_header, chunks_with_context, patch_footer))(input).map( - |(i, (header, content, _))| { - let file_name = file_name_from_header(header); - - let gh_patch = GhPatch { file_name, content }; - - (i, gh_patch) - }, - ) -} - -fn file_name_from_header(header: &str) -> String { - let uf = ""; - - // Extract the file name from a known-valid diff header. - // - // Example: diff --git a/README.md b/README.md - header - .split_whitespace() - .nth(3) - .unwrap_or(uf) - .strip_prefix("b/") - .unwrap_or(uf) - .trim() - .into() -} - -fn patches_with_context(input: &str) -> IResult<&str, Vec> { - many0(patch_with_context)(input) -} - -fn patches(input: &str) -> IResult<&str, Vec> { - many0(patch)(input) -} - -#[derive(Debug)] -struct GhPatch { - file_name: String, - content: String, -} - -pub struct Stat<'a> { - pub lines_added: i64, - pub lines_deleted: i64, - pub file_name: &'a str, -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn parse_diff_header() { - let input = "\ -~~~\n\ -~~~\n\ -\n\ -1\t0\trequirements/test_requirements.txt\n\ -\n\ -diff --git a/requirements/test_requirements.txt b/requirements/test_requirements.txt\n\ -index 4e53f86d35..856ecf115e 100644\n\ ---- a/requirements/test_requirements.txt\n\ -+++ b/requirements/test_requirements.txt\n\ -@@ -7,0 +8 @@ pytest==7.4.0\n\ -+scipy-doctest\n"; - let (leftover, diffs) = diffs(input).unwrap(); - assert!(leftover.is_empty()); - assert_eq!(diffs.len(), 2); - assert!(diffs.get(0).unwrap().file_diffs.is_empty()); - assert!(!(diffs.get(1).unwrap().file_diffs.is_empty())); - } - - #[test] - fn parse_stat() { - let line = "7 0 Cargo.toml\n"; - - let (remaining, stat) = stat(line).unwrap(); - let stat = stat.unwrap(); - - assert_eq!("", remaining); - assert_eq!(7, stat.lines_added); - assert_eq!(0, stat.lines_deleted); - assert_eq!("Cargo.toml", stat.file_name); - } - - #[test] - fn parse_stats() { - let input = "\ -7 0 Cargo.toml\n\ -18 0 README.md\n\ -3 0 src/main.rs\n"; - - let (remaining, stats) = stats(input).unwrap(); - - assert_eq!("", remaining); - - assert_eq!(7, stats[0].lines_added); - assert_eq!(0, stats[0].lines_deleted); - assert_eq!("Cargo.toml", stats[0].file_name); - - assert_eq!(18, stats[1].lines_added); - assert_eq!(0, stats[1].lines_deleted); - assert_eq!("README.md", stats[1].file_name); - - assert_eq!(3, stats[2].lines_added); - assert_eq!(0, stats[2].lines_deleted); - assert_eq!("src/main.rs", stats[2].file_name); - } - - #[test] - fn parse_patch_header() { - let input = "\ -diff --git a/src/main.rs b/src/main.rs\n\ -new file mode 100644\n\ -index 0000000..e7a11a9\n\ ---- /dev/null\n\ -+++ b/src/main.rs\n"; - - let (remaining, header) = patch_header(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(input, header); - } - - #[test] - fn parse_patches() { - let input = "\ -diff --git a/src/main.rs b/src/main.rs\n\ -new file mode 100644\n\ -index 0000000..e7a11a9\n\ ---- /dev/null\n\ -+++ b/src/main.rs\n\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n\ -diff --git a/src/main.rs b/src/main.rs\n\ -new file mode 100644\n\ -index 0000000..e7a11a9\n\ ---- /dev/null\n\ -+++ b/src/main.rs\n\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n"; - - let expected_1 = "\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n"; - - let expected_2 = "\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n"; - - let (remaining, patches) = patches(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(expected_1, patches[0]); - assert_eq!(expected_2, patches[1]); - } - - #[test] - fn parse_patch() { - let input = "\ -diff --git a/src/main.rs b/src/main.rs\n\ -new file mode 100644\n\ -index 0000000..e7a11a9\n\ ---- /dev/null\n\ -+++ b/src/main.rs\n\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n"; - - let expected = "\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n"; - - let (remaining, patch) = patch(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(expected, patch); - } - - #[test] - fn parse_chunks() { - let input = "\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n"; - - let expected = "\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n"; - - let (remaining, patch) = chunks(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(expected, patch); - } - - #[test] - fn parse_chunk() { - let input = "\ -@@ -0,0 +1,116 @@\n\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n"; - - let expected = "\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n"; - - let (remaining, patch) = chunk(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(expected, patch); - } - - #[test] - fn parse_chunk_header() { - let input = "@@ -0,0 +1,116 @@\n"; - let expected = "@@ -0,0 +1,116 @@\n"; - - let (remaining, header) = chunk_header(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(expected, header); - } - - #[test] - fn parse_chunk_body() { - let input = "\ -+use clap::{Arg, App, SubCommand};\n\ -+use serde::{Serialize, Deserialize};\n"; - - let expected = "\ -use clap::{Arg, App, SubCommand};\n\ -use serde::{Serialize, Deserialize};\n"; - - let (remaining, body) = chunk_body(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(expected, body); - } - - #[test] - fn parse_chunk_line() { - let input = "+use clap::{Arg, App, SubCommand};\n"; - let expected = "use clap::{Arg, App, SubCommand};\n"; - - let (remaining, line) = chunk_line(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(expected, line); - } - - #[test] - fn parse_plus_or_minus() { - let input_plus = "+"; - let expected_plus = "+"; - - let (remaining, c) = chunk_prefix(input_plus).unwrap(); - assert_eq!("", remaining); - assert_eq!(expected_plus, c); - - let input_minus = "-"; - let expected_minus = "-"; - let (remaining, c) = chunk_prefix(input_minus).unwrap(); - assert_eq!("", remaining); - assert_eq!(expected_minus, c); - } - - #[test] - fn parse_line_with_ending() { - let input = "use clap::{Arg, App, SubCommand};\n"; - let expected = "use clap::{Arg, App, SubCommand};\n"; - - let (remaining, line) = line_with_ending(input).unwrap(); - assert_eq!("", remaining); - assert_eq!(expected, line); - } - - #[test] - fn parse_diff() { - let input = r#"10 0 .gitignore -4 0 Cargo.toml -127 1 src/main.rs - -diff --git a/.gitignore b/.gitignore -new file mode 100644 -index 0000000..50c8301 ---- /dev/null -+++ b/.gitignore -@@ -0,0 +1,10 @@ -+# Generated by Cargo -+# will have compiled files and executables -+/target/ -+ -+# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -+# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -+Cargo.lock -+ -+# These are backup files generated by rustfmt -+**/*.rs.bk -\ No newline at end of file -diff --git a/Cargo.toml b/Cargo.toml -index 191135b..d91dabb 100644 ---- a/Cargo.toml -+++ b/Cargo.toml -@@ -7,0 +8,4 @@ edition = "2018" -+clap = "2.33.0" -+petgraph = "0.4.13" -+serde = { version = "1.0.91", features = ["derive"] } -+serde_json = "1.0.39" -\ No newline at end of file -diff --git a/src/main.rs b/src/main.rs -index e7a11a9..4894a2e 100644 ---- a/src/main.rs -+++ b/src/main.rs -@@ -0,0 +1,116 @@ -+use clap::{Arg, App, SubCommand}; -+use serde::{Serialize, Deserialize}; -+// use petgraph::{Graph, Directed}; -+// use std::collections::Vec; -+use std::process::Command; -+use std::str; -+ -+// 1. Check that you're in a Git repo. -+// * If not, error out. -+// 2. Run a command to get the Git log data. -+// 3. Deserialize that data with Serde, into a GitLog data structure. -+// 4. Convert the GitLog data structure into a CommitGraph. -+// 5. Run analyses on the CommitGraph. -+ -+/* -+struct CommitGraph { -+ graph: Graph, -+} -+ -+struct AnalysisReport { -+ records: Vec, -+} -+ -+trait Analysis { -+ fn analyze(commit_graph: &CommitGraph) -> AnalysisReport; -+} -+*/ -+ -+#[derive(Deserialize, Debug)] -+struct GitContributor { -+ name: String, -+ email: String, -+ date: String, -+} -+ -+#[derive(Deserialize, Debug)] -+struct GitCommit { -+ commit: String, -+ abbreviated_commit: String, -+ tree: String, -+ abbreviated_tree: String, -+ parent: String, -+ abbreviated_parent: String, -+ refs: String, -+ encoding: String, -+ subject: String, -+ sanitized_subject_line: String, -+ body: String, -+ commit_notes: String, -+ verification_flag: String, -+ signer: String, -+ signer_key: String, -+ author: GitContributor, -+ committer: GitContributor, -+} -+ -+#[derive(Deserialize, Debug)] -+struct GitLog { -+ commits: Vec, -+} -+ -+fn strip_characters(original: &str, to_strip: &str) -> String { -+ original.chars().filter(|&c| !to_strip.contains(c)).collect() -+} -+ -+fn get_git_log() -> String { -+ // The format string being passed to Git, to get commit data. -+ // Note that this matches the GitLog struct above. -+ let format = " \ -+ --pretty=format: \ -+ { %n \ -+ \"commit\": \"%H\", %n \ -+ \"abbreviated_commit\": \"%h\", %n \ -+ \"tree\": \"%T\", %n \ -+ \"abbreviated_tree\": \"%t\", %n \ -+ \"parent\": \"%P\", %n \ -+ \"abbreviated_parent\": \"%p\", %n \ -+ \"refs\": \"%D\", %n \ -+ \"encoding\": \"%e\", %n \ -+ \"subject\": \"%s\", %n \ -+ \"sanitized_subject_line\": \"%f\", %n \ -+ \"body\": \"%b\", %n \ -+ \"commit_notes\": \"%N\", %n \ -+ \"verification_flag\": \"%G?\", %n \ -+ \"signer\": \"%GS\", %n \ -+ \"signer_key\": \"%GK\", %n \ -+ \"author\": { %n \ -+ \"name\": \"%aN\", %n \ -+ \"email\": \"%aE\", %n \ -+ \"date\": \"%aD\" %n \ -+ }, %n \ -+ \"commiter\": { %n \ -+ \"name\": \"%cN\", %n \ -+ \"email\": \"%cE\", %n \ -+ \"date\": \"%cD\" %n \ -+ } %n \ -+ },"; -+ let format = strip_characters(format, " "); -+ -+ // Run the git command and extract the stdout as a string, stripping the trailing comma. -+ let output = Command::new("git") -+ .args(&["log", &format]) -+ .output() -+ .expect("failed to execute process"); -+ let output = str::from_utf8(&output.stdout).unwrap().to_string(); -+ let output = (&output[0..output.len() - 2]).to_string(); // Remove trailing comma. -+ -+ // Wrap the result in brackets. -+ let mut result = String::new(); -+ result.push('['); -+ result.push_str(&output); -+ result.push(']'); -+ -+ result -+} -+ -@@ -2 +118,11 @@ fn main() { -- println!("Hello, world!"); -+ let matches = App::new("hipcheck") -+ .version("0.1") -+ .author("Andrew Lilley Brinker ") -+ .about("Check Git history for concerning patterns") -+ .get_matches(); -+ -+ let log_string = get_git_log(); -+ -+ let gl: GitLog = serde_json::from_str(&log_string).unwrap(); -+ -+ println!("{:?}", gl); -"#; - - let expected = Diff { - additions: Some(141), - deletions: Some(1), - file_diffs: vec![ - FileDiff { - file_name: Arc::new(String::from(".gitignore")), - additions: Some(10), - deletions: Some(0), - patch: String::from( - r#"# Generated by Cargo -# will have compiled files and executables -/target/ - -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -Cargo.lock - -# These are backup files generated by rustfmt -**/*.rs.bk -"#, - ), - }, - FileDiff { - file_name: Arc::new(String::from("Cargo.toml")), - additions: Some(4), - deletions: Some(0), - patch: String::from( - r#"clap = "2.33.0" -petgraph = "0.4.13" -serde = { version = "1.0.91", features = ["derive"] } -serde_json = "1.0.39" -"#, - ), - }, - FileDiff { - file_name: Arc::new(String::from("src/main.rs")), - additions: Some(127), - deletions: Some(1), - patch: String::from( - r#"use clap::{Arg, App, SubCommand}; -use serde::{Serialize, Deserialize}; -// use petgraph::{Graph, Directed}; -// use std::collections::Vec; -use std::process::Command; -use std::str; - -// 1. Check that you're in a Git repo. -// * If not, error out. -// 2. Run a command to get the Git log data. -// 3. Deserialize that data with Serde, into a GitLog data structure. -// 4. Convert the GitLog data structure into a CommitGraph. -// 5. Run analyses on the CommitGraph. - -/* -struct CommitGraph { - graph: Graph, -} - -struct AnalysisReport { - records: Vec, -} - -trait Analysis { - fn analyze(commit_graph: &CommitGraph) -> AnalysisReport; -} -*/ - -#[derive(Deserialize, Debug)] -struct GitContributor { - name: String, - email: String, - date: String, -} - -#[derive(Deserialize, Debug)] -struct GitCommit { - commit: String, - abbreviated_commit: String, - tree: String, - abbreviated_tree: String, - parent: String, - abbreviated_parent: String, - refs: String, - encoding: String, - subject: String, - sanitized_subject_line: String, - body: String, - commit_notes: String, - verification_flag: String, - signer: String, - signer_key: String, - author: GitContributor, - committer: GitContributor, -} - -#[derive(Deserialize, Debug)] -struct GitLog { - commits: Vec, -} - -fn strip_characters(original: &str, to_strip: &str) -> String { - original.chars().filter(|&c| !to_strip.contains(c)).collect() -} - -fn get_git_log() -> String { - // The format string being passed to Git, to get commit data. - // Note that this matches the GitLog struct above. - let format = " \ - --pretty=format: \ - { %n \ - \"commit\": \"%H\", %n \ - \"abbreviated_commit\": \"%h\", %n \ - \"tree\": \"%T\", %n \ - \"abbreviated_tree\": \"%t\", %n \ - \"parent\": \"%P\", %n \ - \"abbreviated_parent\": \"%p\", %n \ - \"refs\": \"%D\", %n \ - \"encoding\": \"%e\", %n \ - \"subject\": \"%s\", %n \ - \"sanitized_subject_line\": \"%f\", %n \ - \"body\": \"%b\", %n \ - \"commit_notes\": \"%N\", %n \ - \"verification_flag\": \"%G?\", %n \ - \"signer\": \"%GS\", %n \ - \"signer_key\": \"%GK\", %n \ - \"author\": { %n \ - \"name\": \"%aN\", %n \ - \"email\": \"%aE\", %n \ - \"date\": \"%aD\" %n \ - }, %n \ - \"commiter\": { %n \ - \"name\": \"%cN\", %n \ - \"email\": \"%cE\", %n \ - \"date\": \"%cD\" %n \ - } %n \ - },"; - let format = strip_characters(format, " "); - - // Run the git command and extract the stdout as a string, stripping the trailing comma. - let output = Command::new("git") - .args(&["log", &format]) - .output() - .expect("failed to execute process"); - let output = str::from_utf8(&output.stdout).unwrap().to_string(); - let output = (&output[0..output.len() - 2]).to_string(); // Remove trailing comma. - - // Wrap the result in brackets. - let mut result = String::new(); - result.push('['); - result.push_str(&output); - result.push(']'); - - result -} - - println!("Hello, world!"); - let matches = App::new("hipcheck") - .version("0.1") - .author("Andrew Lilley Brinker ") - .about("Check Git history for concerning patterns") - .get_matches(); - - let log_string = get_git_log(); - - let gl: GitLog = serde_json::from_str(&log_string).unwrap(); - - println!("{:?}", gl); -"#, - ), - }, - ], - }; - - let (remaining, diff) = diff(input).unwrap(); - - assert_eq!("", remaining); - assert_eq!(expected, diff); - } - - #[test] - fn parse_patch_with_context() { - let input = r#"diff --git a/README.md b/README.md -index 20b42ecfdf..b0f30e8e35 100644 ---- a/README.md -+++ b/README.md -@@ -432,24 +432,31 @@ Other Style Guides - }); - - // bad -- inbox.filter((msg) => { -- const { subject, author } = msg; -- if (subject === 'Mockingbird') { -- return author === 'Harper Lee'; -- } else { -- return false; -- } -- }); -+ var indexMap = myArray.reduce(function(memo, item, index) { -+ memo[item] = index; -+ }, {}); - -- // good -- inbox.filter((msg) => { -- const { subject, author } = msg; -- if (subject === 'Mockingbird') { -- return author === 'Harper Lee'; -- } - -- return false; -+ // good -+ var indexMap = myArray.reduce(function(memo, item, index) { -+ memo[item] = index; -+ return memo; -+ }, {}); -+ -+ -+ // bad -+ const alpha = people.sort((lastOne, nextOne) => { -+ const [aLast, aFirst] = lastOne.split(', '); -+ const [bLast, bFirst] = nextOne.split(', '); - }); -+ -+ // good -+ const alpha = people.sort((lastOne, nextOne) => { -+ const [aLast, aFirst] = lastOne.split(', '); -+ const [bLast, bFirst] = nextOne.split(', '); -+ return aLast > bLast ? 1 : -1; -+ }); -+ - ``` - - -"#; - - let expected = r#" inbox.filter((msg) => { - const { subject, author } = msg; - if (subject === 'Mockingbird') { - return author === 'Harper Lee'; - } else { - return false; - } - }); - var indexMap = myArray.reduce(function(memo, item, index) { - memo[item] = index; - }, {}); - // good - inbox.filter((msg) => { - const { subject, author } = msg; - if (subject === 'Mockingbird') { - return author === 'Harper Lee'; - } - return false; - // good - var indexMap = myArray.reduce(function(memo, item, index) { - memo[item] = index; - return memo; - }, {}); - - - // bad - const alpha = people.sort((lastOne, nextOne) => { - const [aLast, aFirst] = lastOne.split(', '); - const [bLast, bFirst] = nextOne.split(', '); - - // good - const alpha = people.sort((lastOne, nextOne) => { - const [aLast, aFirst] = lastOne.split(', '); - const [bLast, bFirst] = nextOne.split(', '); - return aLast > bLast ? 1 : -1; - }); - -"#; - - let (remaining, patch) = patch_with_context(input).unwrap(); - - assert_eq!( - "", remaining, - "expected nothing remaining, got '{}'", - remaining - ); - assert_eq!(expected, patch.content); - } - - #[test] - fn parse_gh_diff() { - let input = r#"diff --git a/README.md b/README.md -index 20b42ecfdf..b0f30e8e35 100644 ---- a/README.md -+++ b/README.md -@@ -432,24 +432,31 @@ Other Style Guides - }); - - // bad -- inbox.filter((msg) => { -- const { subject, author } = msg; -- if (subject === 'Mockingbird') { -- return author === 'Harper Lee'; -- } else { -- return false; -- } -- }); -+ var indexMap = myArray.reduce(function(memo, item, index) { -+ memo[item] = index; -+ }, {}); - -- // good -- inbox.filter((msg) => { -- const { subject, author } = msg; -- if (subject === 'Mockingbird') { -- return author === 'Harper Lee'; -- } - -- return false; -+ // good -+ var indexMap = myArray.reduce(function(memo, item, index) { -+ memo[item] = index; -+ return memo; -+ }, {}); -+ -+ -+ // bad -+ const alpha = people.sort((lastOne, nextOne) => { -+ const [aLast, aFirst] = lastOne.split(', '); -+ const [bLast, bFirst] = nextOne.split(', '); - }); -+ -+ // good -+ const alpha = people.sort((lastOne, nextOne) => { -+ const [aLast, aFirst] = lastOne.split(', '); -+ const [bLast, bFirst] = nextOne.split(', '); -+ return aLast > bLast ? 1 : -1; -+ }); -+ - ``` - - -"#; - - let expected = r#" inbox.filter((msg) => { - const { subject, author } = msg; - if (subject === 'Mockingbird') { - return author === 'Harper Lee'; - } else { - return false; - } - }); - var indexMap = myArray.reduce(function(memo, item, index) { - memo[item] = index; - }, {}); - // good - inbox.filter((msg) => { - const { subject, author } = msg; - if (subject === 'Mockingbird') { - return author === 'Harper Lee'; - } - return false; - // good - var indexMap = myArray.reduce(function(memo, item, index) { - memo[item] = index; - return memo; - }, {}); - - - // bad - const alpha = people.sort((lastOne, nextOne) => { - const [aLast, aFirst] = lastOne.split(', '); - const [bLast, bFirst] = nextOne.split(', '); - - // good - const alpha = people.sort((lastOne, nextOne) => { - const [aLast, aFirst] = lastOne.split(', '); - const [bLast, bFirst] = nextOne.split(', '); - return aLast > bLast ? 1 : -1; - }); - -"#; - - let (remaining, diff) = gh_diff(input).unwrap(); - - assert_eq!( - "", remaining, - "expected nothing remaining, got '{}'", - remaining - ); - - assert_eq!(None, diff.additions); - assert_eq!(None, diff.deletions); - - assert_eq!("README.md", diff.file_diffs[0].file_name.as_ref()); - assert_eq!(None, diff.file_diffs[0].additions); - assert_eq!(None, diff.file_diffs[0].deletions); - assert_eq!(expected, diff.file_diffs[0].patch) - } -} diff --git a/plugins/git/src/util/command.rs b/plugins/git/src/util/command.rs deleted file mode 100644 index bd289bed..00000000 --- a/plugins/git/src/util/command.rs +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -use std::{convert::AsRef, env, ffi::OsStr, iter::IntoIterator}; - -/// Print command line args as well as commands and args for git commands -pub fn log_git_args(repo_path: &str, args: I, git_path: &str) -where - I: IntoIterator + Copy, - S: AsRef, -{ - log::debug!("logging git CLI args"); - - for arg in env::args() { - log::debug!("git CLI environment arg [arg='{}']", arg); - } - - log::debug!("git CLI executable location [path='{}']", git_path); - - log::debug!("git CLI repository location [path='{}']", repo_path); - - log_each_git_arg(args); - - log::debug!("done logging git CLI args"); -} - -pub fn log_each_git_arg(args: I) -where - I: IntoIterator, - S: AsRef, -{ - for (index, val) in args.into_iter().enumerate() { - let arg_val = val - .as_ref() - .to_str() - .unwrap_or("argument for command could not be logged."); - - log::debug!("git CLI argument [name='{}', value='{}']", index, arg_val); - } -} diff --git a/plugins/git/src/util/git_command.rs b/plugins/git/src/util/git_command.rs index 1bbecec1..ee05f37d 100644 --- a/plugins/git/src/util/git_command.rs +++ b/plugins/git/src/util/git_command.rs @@ -1,129 +1,173 @@ // SPDX-License-Identifier: Apache-2.0 use crate::data::*; -use crate::parse::*; -use crate::util::command::log_git_args; - -use anyhow::{anyhow, Context as _, Result}; -use std::{ - convert::AsRef, ffi::OsStr, iter::IntoIterator, ops::Not as _, path::Path, process::Command, -}; - -#[derive(Debug)] -pub struct GitCommand { - command: Command, +use anyhow::{Context, Result}; +use git2::DiffFormat; +use git2::DiffLineType; +use git2::Repository; +use git2::Revwalk; +use git2::Sort; +use jiff::Timestamp; +use std::str::FromStr; +use std::{convert::AsRef, path::Path}; + +fn get_repo_head<'a>(repo: &'a Repository) -> Result> { + let mut revwalk = repo.revwalk().context("Unable to determine HEAD")?; + // as of right now, we always want the commits sorted from newest to oldest + revwalk + .set_sorting(Sort::TIME) + .context("Unable to set commit sorting")?; + revwalk.push_head().context("Unable to push repo HEAD")?; + Ok(revwalk) } -impl GitCommand { - pub fn for_repo(repo_path: &Path, args: I) -> Result - where - I: IntoIterator + Copy, - S: AsRef, - { - GitCommand::internal(Some(repo_path), args) +/// Function to call on every commit in the repo to accumulate some type for each commit in the repo +type MapFn<'a, T> = &'a dyn Fn(&Repository, git2::Commit<'_>) -> Result; +/// Function that will break out of the git tree walking process, if this returns Ok(true) +type BreakNowFn<'a> = &'a dyn Fn(&git2::Commit<'_>) -> bool; + +/// Utility function for walking all of the commits in a git repo and running a function on each commit to generate some result and breaking out of the walk if `break_now` is true +fn walk_commits(repo: P, func: MapFn, break_now: Option) -> Result> +where + P: AsRef, +{ + let repo = Repository::open(repo).context("Could not open repository")?; + let revwalk = get_repo_head(&repo)?; + // since we are walking commit by commit, 5,000 was arbitrarily chosen to reduce allocations for small/medium repo sizes + let mut results = Vec::with_capacity(5_000); + for oid in revwalk { + let oid = oid?; + let commit = repo.find_commit(oid)?; + if let Some(ref break_now) = break_now { + if break_now(&commit) { + break; + } + } + let res = func(&repo, commit)?; + results.push(res); } + Ok(results) +} - fn internal(repo_path: Option<&Path>, args: I) -> Result - where - I: IntoIterator + Copy, - S: AsRef, - { - // Init the command. - let git_path = which::which("git").context("can't find git command")?; - let no_repo_found = Path::new("no_repo_found"); - let repo = repo_path.unwrap_or(no_repo_found).display().to_string(); - let path = git_path.display().to_string(); - log_git_args(&repo, args, &path); - let mut command = Command::new(&git_path); - command.args(args); - - // Set the path if necessary - if let Some(repo_path) = repo_path { - command.current_dir(repo_path); - } +fn get_raw_commit(_repo: &Repository, commit: git2::Commit) -> Result { + Ok(RawCommit::from(commit)) +} - if cfg!(windows) { - // this method is broken on Windows. See: https://github.com/rust-lang/rust/issues/31259 - //command.env_clear() - } else { - command.env_clear(); - }; +/// retrieve all of the raw commits in a repos history +pub fn get_raw_commits>(repo: P) -> Result> { + walk_commits(repo, &get_raw_commit, None) +} - Ok(GitCommand { command }) - } +fn get_commit(_repo: &Repository, commit: git2::Commit) -> Result { + Ok(Commit::from(commit)) +} - pub fn output(&mut self) -> Result { - let output = self.command.output()?; +/// retrieve all of the commits in a repos history +pub fn get_commits>(repo: P) -> Result> { + walk_commits(repo, &get_commit, None) +} - if output.status.success() { - let output_text = String::from_utf8_lossy(&output.stdout).to_string(); - return Ok(output_text); - } +/// retrieve all of the commits in a repos history that were committed after a specific time +pub fn get_commits_from_date

(repo: P, since: Timestamp) -> Result> +where + P: AsRef, +{ + walk_commits( + repo, + &get_commit, + Some(&|commit| { + let raw_commit = RawCommit::from(commit.clone()); + if let Ok(commit_timestamp) = raw_commit.committed_on { + if let Ok(commit_timestamp) = jiff::Timestamp::from_str(&commit_timestamp) { + return commit_timestamp < since; + } + } + false + }), + ) +} - match String::from_utf8(output.stderr) { - Ok(msg) if msg.is_empty().not() => { - Err(anyhow!("(from git) {} [{}]", msg.trim(), output.status)) +/// from a given commit in a repo, attempt to generate the Diff between this commit and its parent +fn get_diff_raw(repo: &Repository, commit: git2::Commit) -> Result { + let current_tree = commit + .tree() + .context("Could not determine tree for the current commit")?; + + // if there is no previous commit, then this must be the first commit + let previous_tree = match commit.parents().next() { + Some(previous_commit) => Some( + previous_commit + .tree() + .context("Could not determine tree for the previous commit")?, + ), + None => None, + }; + + let diff = repo + .diff_tree_to_tree(previous_tree.as_ref(), Some(¤t_tree), None) + .context("Could not diff current commit to previous commit")?; + + let stats = diff.stats().context("Could not determine stats for diff")?; + + let total_insertions_in_commit = stats.insertions(); + let total_deletions_in_commit = stats.deletions(); + + // arbitrary pre-allocation to hold FileDiffs for this commit to reduce number of needed allocations + let mut file_diffs: Vec = Vec::with_capacity(128); + // iterate over all of the patches in this commit to generate all of the FileDiffs for this commit + diff.print(DiffFormat::Patch, |delta, _hunk, line| { + if let Some(file_name) = delta.new_file().path() { + let file_name = file_name.to_string_lossy(); + + let file_diff: &mut FileDiff = match file_diffs + .iter_mut() + .find(|fd| fd.file_name.as_str() == file_name) + { + Some(file_diff) => file_diff, + None => { + file_diffs.push(FileDiff { + file_name: file_name.to_string().into(), + additions: 0, + deletions: 0, + patch: String::new(), + }); + // unwrap is safe because we just pushed + file_diffs.last_mut().unwrap() + } + }; + + // add the line to the patch + file_diff.add_to_patch(line.content()); + + match line.origin_value() { + DiffLineType::Addition => file_diff.increment_additions(1), + DiffLineType::Deletion => file_diff.increment_deletions(1), + _ => {} } - _ => Err(anyhow!("git failed [{}]", output.status)), } - } + true + }) + .context("Could not generate FileDiff for commit")?; + + Ok(Diff { + additions: Some(total_insertions_in_commit as i64), + deletions: Some(total_deletions_in_commit as i64), + file_diffs, + }) } -pub fn get_commits(repo: &str) -> Result> { - let path = Path::new(repo); - let raw_output = GitCommand::for_repo( - path, - [ - "--no-pager", - "log", - "--no-merges", - "--date=iso-strict", - "--pretty=tformat:%H%n%aN%n%aE%n%ad%n%cN%n%cE%n%cd%n", - ], - )? - .output() - .context("git log command failed")?; - - git_log(&raw_output) +/// get the diff between each commit and its parent in the repo +pub fn get_diffs>(repo: P) -> Result> { + walk_commits(repo, &get_diff_raw, None) } -pub fn get_commits_from_date(repo: &str, date: &str) -> Result> { - let path = Path::new(repo); - let since_date = format!("--since='{} month ago'", date); - let msg = format!("git log from date {} command failed", &date); - let raw_output = GitCommand::for_repo( - path, - [ - "--no-pager", - "log", - "--no-merges", - "--date=iso-strict", - "--pretty=tformat:%H%n%aN%n%aE%n%ad%n%cN%n%cE%n%cd%n%GS%n%GK%n", - "--all", - &since_date, - ], - )? - .output() - .context(msg)?; - - git_log(&raw_output) +fn get_commit_diff(repo: &Repository, commit: git2::Commit) -> Result { + let hc_commit = Commit::from(commit.clone()); + let diff = get_diff_raw(repo, commit)?; + Ok(CommitDiff::new(hc_commit, diff)) } -pub fn get_diffs(repo: &str) -> Result> { - let path = Path::new(repo); - let output = GitCommand::for_repo( - path, - [ - "--no-pager", - "log", - "--no-merges", - "--numstat", - "--pretty=tformat:~~~", - "-U0", - ], - )? - .output() - .context("git diff command failed")?; - - git_diff(&output) +/// gets all of the commits and diffs in the repo +pub fn get_commit_diffs>(repo: P) -> Result> { + walk_commits(repo, &get_commit_diff, None) } diff --git a/plugins/git/src/util/mod.rs b/plugins/git/src/util/mod.rs index ec532257..d0980d7e 100644 --- a/plugins/git/src/util/mod.rs +++ b/plugins/git/src/util/mod.rs @@ -1,4 +1,3 @@ // SPDX-License-Identifier: Apache-2.0 -pub mod command; pub mod git_command;