From 684408646b22e1e2daeb45daecc6f8bc27ec57c6 Mon Sep 17 00:00:00 2001 From: Gregor MacDougall Date: Fri, 1 Dec 2023 08:29:01 -0500 Subject: [PATCH 1/5] Switch directory ownership to `git ls-files` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This switches away from using a glob match on '**' within `DirectoryOwnership` matcher in favour of using `git ls-files`. This provides a significant speed improvement for large repositories for a few reasons. The primary reason is that we're only searching through tracked files and avoiding potentially large directores, such as `node_modules`. The major intended side effect of this is that it only is dealing with tracked files rather than all files. I'm opening the PR for further discussion and refinement to see if this is worth considering. Speed comparison for me locally: Before: ``` ❯ time bin/codeownership validate ________________________________________________________ Executed in 16.72 secs fish external usr time 3.08 secs 0.10 millis 3.08 secs sys time 10.70 secs 1.59 millis 10.70 secs ``` After: ``` ❯ time bin/codeownership validate ________________________________________________________ Executed in 10.67 secs fish external usr time 2.90 secs 111.00 micros 2.90 secs sys time 8.65 secs 741.00 micros 8.65 secs ``` --- .../private/ownership_mappers/directory_ownership.rb | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/code_ownership/private/ownership_mappers/directory_ownership.rb b/lib/code_ownership/private/ownership_mappers/directory_ownership.rb index e481bfb..a333e3f 100644 --- a/lib/code_ownership/private/ownership_mappers/directory_ownership.rb +++ b/lib/code_ownership/private/ownership_mappers/directory_ownership.rb @@ -41,12 +41,10 @@ def update_cache(cache, files) returns(T::Hash[String, ::CodeTeams::Team]) end def globs_to_owner(files) - # The T.unsafe is because the upstream RBI is wrong for Pathname.glob - T - .unsafe(Pathname) - .glob(File.join('**/', CODEOWNERS_DIRECTORY_FILE_NAME)) - .map(&:cleanpath) - .each_with_object({}) do |pathname, res| + `git ls-files **/#{CODEOWNERS_DIRECTORY_FILE_NAME}` + .each_line + .each_with_object({}) do |fname, res| + pathname = Pathname.new(fname.strip).cleanpath owner = owner_for_codeowners_file(pathname) res[pathname.dirname.cleanpath.join('**/**').to_s] = owner end From 3dca00a948b8639e3c5644ed74aa37f0d31bcdf4 Mon Sep 17 00:00:00 2001 From: Gregor MacDougall Date: Sun, 23 Jun 2024 23:32:03 -0400 Subject: [PATCH 2/5] Add configuration for use_git_ls_files This allows us to decide which algorithm to use for finding code ownership files within the repo. If the configuration directive is set to use_git_ls_files then we will use that rather than finding files via a glob. This will lead to a significant speed improvement as finding files via glob is slow. --- lib/code_ownership/configuration.rb | 4 +++- .../ownership_mappers/directory_ownership.rb | 22 +++++++++++++------ spec/lib/code_ownership_spec.rb | 17 ++++++++++++++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/lib/code_ownership/configuration.rb b/lib/code_ownership/configuration.rb index 295091c..a410f2b 100644 --- a/lib/code_ownership/configuration.rb +++ b/lib/code_ownership/configuration.rb @@ -12,6 +12,7 @@ class Configuration < T::Struct const :skip_codeowners_validation, T::Boolean const :raw_hash, T::Hash[T.untyped, T.untyped] const :require_github_teams, T::Boolean + const :use_git_ls_files, T::Boolean sig { returns(Configuration) } def self.fetch @@ -29,7 +30,8 @@ def self.fetch js_package_paths: js_package_paths(config_hash), skip_codeowners_validation: config_hash.fetch('skip_codeowners_validation', false), raw_hash: config_hash, - require_github_teams: config_hash.fetch('require_github_teams', false) + require_github_teams: config_hash.fetch('require_github_teams', false), + use_git_ls_files: config_hash.fetch('use_git_ls_files', false) ) end diff --git a/lib/code_ownership/private/ownership_mappers/directory_ownership.rb b/lib/code_ownership/private/ownership_mappers/directory_ownership.rb index a333e3f..9661ae6 100644 --- a/lib/code_ownership/private/ownership_mappers/directory_ownership.rb +++ b/lib/code_ownership/private/ownership_mappers/directory_ownership.rb @@ -37,14 +37,22 @@ def update_cache(cache, files) # subset of files, but rather we want code ownership for all files. # sig do - override.params(files: T::Array[String]). - returns(T::Hash[String, ::CodeTeams::Team]) + override.params(_files: T::Array[String]) + .returns(T::Hash[String, ::CodeTeams::Team]) end - def globs_to_owner(files) - `git ls-files **/#{CODEOWNERS_DIRECTORY_FILE_NAME}` - .each_line - .each_with_object({}) do |fname, res| - pathname = Pathname.new(fname.strip).cleanpath + def globs_to_owner(_files) + file_list = + if CodeOwnership.configuration.use_git_ls_files + `git ls-files **/#{CODEOWNERS_DIRECTORY_FILE_NAME}` + .each_line + .map { Pathname.new(_1.strip).cleanpath } + else + T + .unsafe(Pathname) + .glob(File.join('**/', CODEOWNERS_DIRECTORY_FILE_NAME)) + .map(&:cleanpath) + end + file_list.each_with_object({}) do |pathname, res| owner = owner_for_codeowners_file(pathname) res[pathname.dirname.cleanpath.join('**/**').to_s] = owner end diff --git a/spec/lib/code_ownership_spec.rb b/spec/lib/code_ownership_spec.rb index d053895..81db055 100644 --- a/spec/lib/code_ownership_spec.rb +++ b/spec/lib/code_ownership_spec.rb @@ -23,6 +23,23 @@ expect { CodeOwnership.validate!(files: ['app/services/test/some_unowned_file.rb']) }.to_not raise_error expect { CodeOwnership.validate!(files: ['app/services/[test]/some_unowned_file.rb']) }.to_not raise_error end + + context 'when the the configuration for use_git_ls_files is enabled' do + before do + allow(CodeOwnership.configuration).to receive(:use_git_ls_files) { true } + allow_any_instance_of(Object).to receive(:`) do + Pathname + .glob(File.join('**/', '.codeowner')) + .map(&:to_s) + .join("\n") + end + end + + it 'has no validation errors' do + expect { CodeOwnership.validate!(files: ['app/services/test/some_unowned_file.rb']) }.to_not raise_error + expect { CodeOwnership.validate!(files: ['app/services/[test]/some_unowned_file.rb']) }.to_not raise_error + end + end end context 'file ownership with [] characters' do From 04eea8f5a2649054640fe65d18300d23b9601cd0 Mon Sep 17 00:00:00 2001 From: Gregor MacDougall Date: Sun, 23 Jun 2024 23:45:26 -0400 Subject: [PATCH 3/5] Add configuration option for git ls-files This allows us to configure a tool to use git ls-files over using the standard file glob matching. --- lib/code_ownership/cli.rb | 5 +++++ lib/code_ownership/configuration.rb | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/code_ownership/cli.rb b/lib/code_ownership/cli.rb index c391b96..ab3e6aa 100644 --- a/lib/code_ownership/cli.rb +++ b/lib/code_ownership/cli.rb @@ -48,6 +48,11 @@ def self.validate!(argv) options[:skip_stage] = true end + opts.on('-use-git-ls-files', + 'Use git ls-files for findinging .codeowner files instead of globs. Will be faster if in a git context.') do + CodeOwnership::Configuration.use_git_ls_files = true + end + opts.on('--help', 'Shows this prompt') do puts opts exit diff --git a/lib/code_ownership/configuration.rb b/lib/code_ownership/configuration.rb index a410f2b..4129826 100644 --- a/lib/code_ownership/configuration.rb +++ b/lib/code_ownership/configuration.rb @@ -12,7 +12,7 @@ class Configuration < T::Struct const :skip_codeowners_validation, T::Boolean const :raw_hash, T::Hash[T.untyped, T.untyped] const :require_github_teams, T::Boolean - const :use_git_ls_files, T::Boolean + prop :use_git_ls_files, T::Boolean sig { returns(Configuration) } def self.fetch From cc2abec7ea441677b4a22fb78d21bfe5034ac268 Mon Sep 17 00:00:00 2001 From: Gregor MacDougall Date: Tue, 25 Jun 2024 14:34:08 -0400 Subject: [PATCH 4/5] Proper number of dashes in command argument This switches `-use-git-ls-files` to `--use-git-ls-files` because I screwed this up. --- lib/code_ownership/cli.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/code_ownership/cli.rb b/lib/code_ownership/cli.rb index ab3e6aa..7042e61 100644 --- a/lib/code_ownership/cli.rb +++ b/lib/code_ownership/cli.rb @@ -48,7 +48,7 @@ def self.validate!(argv) options[:skip_stage] = true end - opts.on('-use-git-ls-files', + opts.on('--use-git-ls-files', 'Use git ls-files for findinging .codeowner files instead of globs. Will be faster if in a git context.') do CodeOwnership::Configuration.use_git_ls_files = true end From 4c75a1894eca43a2f798673e728b43e9f9442f49 Mon Sep 17 00:00:00 2001 From: Gregor MacDougall Date: Tue, 25 Jun 2024 14:44:27 -0400 Subject: [PATCH 5/5] Pass use_git_ls_files on to config This passes the argument from the CLI onto the configuration to be loaded into the configuration through semi-hacky means. --- lib/code_ownership.rb | 5 ++++- lib/code_ownership/cli.rb | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/code_ownership.rb b/lib/code_ownership.rb index 9ffb54a..62561cb 100644 --- a/lib/code_ownership.rb +++ b/lib/code_ownership.rb @@ -85,14 +85,17 @@ def self.remove_file_annotation!(filename) autocorrect: T::Boolean, stage_changes: T::Boolean, files: T.nilable(T::Array[String]), + use_git_ls_files: T::Boolean, ).void end def validate!( autocorrect: true, stage_changes: true, - files: nil + files: nil, + use_git_ls_files: false ) Private.load_configuration! + configuration.use_git_ls_files = use_git_ls_files tracked_file_subset = if files files.select{|f| Private.file_tracked?(f)} diff --git a/lib/code_ownership/cli.rb b/lib/code_ownership/cli.rb index 7042e61..28a523b 100644 --- a/lib/code_ownership/cli.rb +++ b/lib/code_ownership/cli.rb @@ -50,7 +50,7 @@ def self.validate!(argv) opts.on('--use-git-ls-files', 'Use git ls-files for findinging .codeowner files instead of globs. Will be faster if in a git context.') do - CodeOwnership::Configuration.use_git_ls_files = true + options[:use_git_ls_files] = true end opts.on('--help', 'Shows this prompt') do @@ -72,7 +72,8 @@ def self.validate!(argv) CodeOwnership.validate!( files: files, autocorrect: !options[:skip_autocorrect], - stage_changes: !options[:skip_stage] + stage_changes: !options[:skip_stage], + use_git_ls_files: !!options[:use_git_ls_files] ) end