From 769e4580f3dbef122cddb070f084c22f76d3a2b5 Mon Sep 17 00:00:00 2001 From: Armando Montanez Date: Thu, 23 Jan 2025 13:13:11 -0800 Subject: [PATCH] pw_{cli,presubmit}: Split out file discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Splits out the file discovery logic of pw format to be library-ified to make it sharable. Bug: 326309165 Change-Id: I1a68348f7b7cd7e2adf3e8a8d6498f6ae783bf30 Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/261723 Pigweed-Auto-Submit: Armando Montanez Commit-Queue: Auto-Submit Lint: Lint 🤖 Presubmit-Verified: CQ Bot Account Reviewed-by: Wyatt Hepler --- pw_cli/api.rst | 6 + pw_cli/py/BUILD.bazel | 1 + pw_cli/py/BUILD.gn | 1 + pw_cli/py/pw_cli/collect_files.py | 122 ++++++++++++++++++++ pw_presubmit/py/pw_presubmit/format_code.py | 42 ++----- pw_presubmit/py/pw_presubmit/keep_sorted.py | 46 ++------ 6 files changed, 152 insertions(+), 66 deletions(-) create mode 100644 pw_cli/py/pw_cli/collect_files.py diff --git a/pw_cli/api.rst b/pw_cli/api.rst index aa3852f7fa..69b73aad06 100644 --- a/pw_cli/api.rst +++ b/pw_cli/api.rst @@ -6,6 +6,12 @@ API Reference .. pigweed-module-subpage:: :name: pw_cli +-------------------- +pw_cli.collect_files +-------------------- +.. automodule:: pw_cli.collect_files + :members: + ----------------- pw_cli.decorators ----------------- diff --git a/pw_cli/py/BUILD.bazel b/pw_cli/py/BUILD.bazel index 4025421349..bf91871d1d 100644 --- a/pw_cli/py/BUILD.bazel +++ b/pw_cli/py/BUILD.bazel @@ -29,6 +29,7 @@ py_library( "pw_cli/argument_types.py", "pw_cli/arguments.py", "pw_cli/branding.py", + "pw_cli/collect_files.py", "pw_cli/color.py", "pw_cli/decorators.py", "pw_cli/diff.py", diff --git a/pw_cli/py/BUILD.gn b/pw_cli/py/BUILD.gn index 353319d0f3..9d760f7749 100644 --- a/pw_cli/py/BUILD.gn +++ b/pw_cli/py/BUILD.gn @@ -30,6 +30,7 @@ pw_python_package("py") { "pw_cli/argument_types.py", "pw_cli/arguments.py", "pw_cli/branding.py", + "pw_cli/collect_files.py", "pw_cli/color.py", "pw_cli/decorators.py", "pw_cli/diff.py", diff --git a/pw_cli/py/pw_cli/collect_files.py b/pw_cli/py/pw_cli/collect_files.py new file mode 100644 index 0000000000..03104896c1 --- /dev/null +++ b/pw_cli/py/pw_cli/collect_files.py @@ -0,0 +1,122 @@ +# Copyright 2024 The Pigweed Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +"""Utilities for file collection in a repository.""" + +import logging +from pathlib import Path +from typing import Collection, Pattern, Sequence + +from pw_cli.tool_runner import ToolRunner +from pw_cli.file_filter import exclude_paths +from pw_cli.git_repo import ( + describe_git_pattern, + find_git_repo, + GitError, + TRACKING_BRANCH_ALIAS, +) + + +_LOG = logging.getLogger(__name__) + + +def collect_files_in_current_repo( + pathspecs: Collection[Path | str], + tool_runner: ToolRunner, + modified_since_git_ref: str | None = None, + exclude_patterns: Collection[Pattern] = tuple(), + action_flavor_text: str = 'Collecting', +) -> Sequence[Path]: + """Collects files given a variety of pathspecs and maps them to their repo. + + This is a relatively fuzzy file finder for projects tracked in a Git repo. + It's designed to adhere to the following constraints: + + * If a pathspec is a real file, unconditionally return it. + * If no pathspecs are passed, collect from the current working directory. + * Return the path of any files modified since `modified_since_git_ref` + (which may be a branch, tag, or commit) that match the provided + pathspecs. + * Passing no pathspecs has the same behavior as passing `.` (everything in + the current directory). + + Args: + pathspecs: Files or git pathspecs to collect files from. Wildcards (e.g. + `pw_cl*`) are accepted. + tool_runner: The ToolRunner to use for Git operations. + modified_since_git_ref: If the passed pathspec is tracked by a git repo, + it is excluded if unmodified since the specified pathspec. If the + pathspec is `None`, no files are excluded. + exclude_patterns: A collection of exclude patterns to exclude from the + set of collected files. + action_flavor_text: Replaces "Collecting" in the + "Collecting all files in the foo repo" log message with a + personalized string (e.g. "Formatting all files..."). + + Returns: + A dictionary mapping a GitRepo to a list of paths relative to that + repo's root that match the provided pathspecs. Files not tracked by + any git repo are mapped to the `None` key. + """ + # TODO: https://pwbug.dev/391690594 - This is brittle and not covered by + # tests. Someday it should be re-thought, particularly to better handle + # multi-repo setups. + files = [Path(path).resolve() for path in pathspecs if Path(path).is_file()] + try: + current_repo = find_git_repo(Path.cwd(), tool_runner) + except GitError: + current_repo = None + + # If this is a Git repo, list the original paths with git ls-files or diff. + if current_repo is not None: + # Implement a graceful fallback in case the tracking branch isn't + # available. + if ( + modified_since_git_ref == TRACKING_BRANCH_ALIAS + and not current_repo.tracking_branch() + ): + _LOG.warning( + 'Failed to determine the tracking branch, using --base HEAD~1 ' + 'instead of listing all files' + ) + modified_since_git_ref = 'HEAD~1' + + _LOG.info( + '%s %s', + action_flavor_text, + describe_git_pattern( + Path.cwd(), + modified_since_git_ref, + pathspecs, + exclude_patterns, + tool_runner, + current_repo.root(), + ), + ) + + # Add files from Git and remove duplicates. + files = sorted( + set( + exclude_paths( + exclude_patterns, + current_repo.list_files(modified_since_git_ref, pathspecs), + ) + ) + | set(files) + ) + elif modified_since_git_ref: + _LOG.critical( + 'A base commit may only be provided if running from a Git repo' + ) + + return files diff --git a/pw_presubmit/py/pw_presubmit/format_code.py b/pw_presubmit/py/pw_presubmit/format_code.py index b54c22218b..b42db4a8cb 100755 --- a/pw_presubmit/py/pw_presubmit/format_code.py +++ b/pw_presubmit/py/pw_presubmit/format_code.py @@ -41,10 +41,11 @@ TextIO, ) +from pw_cli.collect_files import collect_files_in_current_repo import pw_cli.color from pw_cli.diff import colorize_diff import pw_cli.env -from pw_cli.file_filter import FileFilter, exclude_paths +from pw_cli.file_filter import FileFilter from pw_cli.plural import plural import pw_env_setup.config_file from pw_presubmit.presubmit import filter_paths @@ -820,39 +821,18 @@ def format_paths_in_repo( ) -> int: """Checks or fixes formatting for files in a Git repo.""" - files = [Path(path).resolve() for path in paths if os.path.isfile(path)] repo = git_repo.root() if git_repo.is_repo() else None - # Implement a graceful fallback in case the tracking branch isn't available. - if base == git_repo.TRACKING_BRANCH_ALIAS and not git_repo.tracking_branch( - repo - ): - _LOG.warning( - 'Failed to determine the tracking branch, using --base HEAD~1 ' - 'instead of listing all files' - ) - base = 'HEAD~1' - - # If this is a Git repo, list the original paths with git ls-files or diff. - if repo: - project_root = pw_cli.env.pigweed_environment().PW_PROJECT_ROOT - _LOG.info( - 'Formatting %s', - git_repo.describe_files( - repo, Path.cwd(), base, paths, exclude, project_root - ), - ) + files = collect_files_in_current_repo( + paths, + PresubmitToolRunner(), + modified_since_git_ref=base, + exclude_patterns=exclude, + action_flavor_text='Formatting', + ) - # Add files from Git and remove duplicates. - files = sorted( - set(exclude_paths(exclude, git_repo.list_files(base, paths))) - | set(files) - ) - elif base: - _LOG.critical( - 'A base commit may only be provided if running from a Git repo' - ) - return 1 + # The format tooling currently expects absolute paths when filtering paths. + files = [Path.cwd() / f for f in files] return format_files( files, diff --git a/pw_presubmit/py/pw_presubmit/keep_sorted.py b/pw_presubmit/py/pw_presubmit/keep_sorted.py index 0a9fa2ca33..a09213c553 100644 --- a/pw_presubmit/py/pw_presubmit/keep_sorted.py +++ b/pw_presubmit/py/pw_presubmit/keep_sorted.py @@ -17,7 +17,6 @@ import dataclasses import difflib import logging -import os from pathlib import Path import re import sys @@ -29,10 +28,10 @@ ) import pw_cli +from pw_cli.collect_files import collect_files_in_current_repo from pw_cli.diff import colorize_diff -from pw_cli.file_filter import exclude_paths from pw_cli.plural import plural -from . import cli, git_repo, presubmit, presubmit_context +from . import cli, git_repo, presubmit, presubmit_context, tools DEFAULT_PATH = Path('out', 'presubmit', 'keep_sorted') @@ -59,7 +58,7 @@ @dataclasses.dataclass class KeepSortedContext: - paths: list[Path] + paths: Sequence[Path] fix: bool output_dir: Path failure_summary_log: Path @@ -431,39 +430,16 @@ def keep_sorted_in_repo( ) -> int: """Checks or fixes keep-sorted blocks for files in a Git repo.""" - files = [Path(path).resolve() for path in paths if os.path.isfile(path)] + project_root = pw_cli.env.project_root() repo = git_repo.root() if git_repo.is_repo() else None - # Implement a graceful fallback in case the tracking branch isn't available. - if base == git_repo.TRACKING_BRANCH_ALIAS and not git_repo.tracking_branch( - repo - ): - _LOG.warning( - 'Failed to determine the tracking branch, using --base HEAD~1 ' - 'instead of listing all files' - ) - base = 'HEAD~1' - - # If this is a Git repo, list the original paths with git ls-files or diff. - project_root = pw_cli.env.pigweed_environment().PW_PROJECT_ROOT - if repo: - _LOG.info( - 'Sorting %s', - git_repo.describe_files( - repo, Path.cwd(), base, paths, exclude, project_root - ), - ) - - # Add files from Git and remove duplicates. - files = sorted( - set(exclude_paths(exclude, git_repo.list_files(base, paths))) - | set(files) - ) - elif base: - _LOG.critical( - 'A base commit may only be provided if running from a Git repo' - ) - return 1 + files = collect_files_in_current_repo( + paths, + tools.PresubmitToolRunner(), + modified_since_git_ref=base, + exclude_patterns=exclude, + action_flavor_text='Sorting', + ) outdir: Path if output_directory: