Skip to content

Commit

Permalink
pw_{cli,presubmit}: Split out file discovery
Browse files Browse the repository at this point in the history
Splits out the file discovery logic of pw format to be library-ified to
make it sharable.

Bug: 326309165
Change-Id: I1a68348f7b7cd7e2adf3e8a8d6498f6ae783bf30
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/261723
Pigweed-Auto-Submit: Armando Montanez <[email protected]>
Commit-Queue: Auto-Submit <[email protected]>
Lint: Lint 🤖 <[email protected]>
Presubmit-Verified: CQ Bot Account <[email protected]>
Reviewed-by: Wyatt Hepler <[email protected]>
  • Loading branch information
armandomontanez authored and CQ Bot Account committed Jan 23, 2025
1 parent 1c6be85 commit 769e458
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 66 deletions.
6 changes: 6 additions & 0 deletions pw_cli/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ API Reference
.. pigweed-module-subpage::
:name: pw_cli

--------------------
pw_cli.collect_files
--------------------
.. automodule:: pw_cli.collect_files
:members:

-----------------
pw_cli.decorators
-----------------
Expand Down
1 change: 1 addition & 0 deletions pw_cli/py/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ py_library(
"pw_cli/argument_types.py",
"pw_cli/arguments.py",
"pw_cli/branding.py",
"pw_cli/collect_files.py",
"pw_cli/color.py",
"pw_cli/decorators.py",
"pw_cli/diff.py",
Expand Down
1 change: 1 addition & 0 deletions pw_cli/py/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pw_python_package("py") {
"pw_cli/argument_types.py",
"pw_cli/arguments.py",
"pw_cli/branding.py",
"pw_cli/collect_files.py",
"pw_cli/color.py",
"pw_cli/decorators.py",
"pw_cli/diff.py",
Expand Down
122 changes: 122 additions & 0 deletions pw_cli/py/pw_cli/collect_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright 2024 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Utilities for file collection in a repository."""

import logging
from pathlib import Path
from typing import Collection, Pattern, Sequence

from pw_cli.tool_runner import ToolRunner
from pw_cli.file_filter import exclude_paths
from pw_cli.git_repo import (
describe_git_pattern,
find_git_repo,
GitError,
TRACKING_BRANCH_ALIAS,
)


_LOG = logging.getLogger(__name__)


def collect_files_in_current_repo(
pathspecs: Collection[Path | str],
tool_runner: ToolRunner,
modified_since_git_ref: str | None = None,
exclude_patterns: Collection[Pattern] = tuple(),
action_flavor_text: str = 'Collecting',
) -> Sequence[Path]:
"""Collects files given a variety of pathspecs and maps them to their repo.
This is a relatively fuzzy file finder for projects tracked in a Git repo.
It's designed to adhere to the following constraints:
* If a pathspec is a real file, unconditionally return it.
* If no pathspecs are passed, collect from the current working directory.
* Return the path of any files modified since `modified_since_git_ref`
(which may be a branch, tag, or commit) that match the provided
pathspecs.
* Passing no pathspecs has the same behavior as passing `.` (everything in
the current directory).
Args:
pathspecs: Files or git pathspecs to collect files from. Wildcards (e.g.
`pw_cl*`) are accepted.
tool_runner: The ToolRunner to use for Git operations.
modified_since_git_ref: If the passed pathspec is tracked by a git repo,
it is excluded if unmodified since the specified pathspec. If the
pathspec is `None`, no files are excluded.
exclude_patterns: A collection of exclude patterns to exclude from the
set of collected files.
action_flavor_text: Replaces "Collecting" in the
"Collecting all files in the foo repo" log message with a
personalized string (e.g. "Formatting all files...").
Returns:
A dictionary mapping a GitRepo to a list of paths relative to that
repo's root that match the provided pathspecs. Files not tracked by
any git repo are mapped to the `None` key.
"""
# TODO: https://pwbug.dev/391690594 - This is brittle and not covered by
# tests. Someday it should be re-thought, particularly to better handle
# multi-repo setups.
files = [Path(path).resolve() for path in pathspecs if Path(path).is_file()]
try:
current_repo = find_git_repo(Path.cwd(), tool_runner)
except GitError:
current_repo = None

# If this is a Git repo, list the original paths with git ls-files or diff.
if current_repo is not None:
# Implement a graceful fallback in case the tracking branch isn't
# available.
if (
modified_since_git_ref == TRACKING_BRANCH_ALIAS
and not current_repo.tracking_branch()
):
_LOG.warning(
'Failed to determine the tracking branch, using --base HEAD~1 '
'instead of listing all files'
)
modified_since_git_ref = 'HEAD~1'

_LOG.info(
'%s %s',
action_flavor_text,
describe_git_pattern(
Path.cwd(),
modified_since_git_ref,
pathspecs,
exclude_patterns,
tool_runner,
current_repo.root(),
),
)

# Add files from Git and remove duplicates.
files = sorted(
set(
exclude_paths(
exclude_patterns,
current_repo.list_files(modified_since_git_ref, pathspecs),
)
)
| set(files)
)
elif modified_since_git_ref:
_LOG.critical(
'A base commit may only be provided if running from a Git repo'
)

return files
42 changes: 11 additions & 31 deletions pw_presubmit/py/pw_presubmit/format_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@
TextIO,
)

from pw_cli.collect_files import collect_files_in_current_repo
import pw_cli.color
from pw_cli.diff import colorize_diff
import pw_cli.env
from pw_cli.file_filter import FileFilter, exclude_paths
from pw_cli.file_filter import FileFilter
from pw_cli.plural import plural
import pw_env_setup.config_file
from pw_presubmit.presubmit import filter_paths
Expand Down Expand Up @@ -820,39 +821,18 @@ def format_paths_in_repo(
) -> int:
"""Checks or fixes formatting for files in a Git repo."""

files = [Path(path).resolve() for path in paths if os.path.isfile(path)]
repo = git_repo.root() if git_repo.is_repo() else None

# Implement a graceful fallback in case the tracking branch isn't available.
if base == git_repo.TRACKING_BRANCH_ALIAS and not git_repo.tracking_branch(
repo
):
_LOG.warning(
'Failed to determine the tracking branch, using --base HEAD~1 '
'instead of listing all files'
)
base = 'HEAD~1'

# If this is a Git repo, list the original paths with git ls-files or diff.
if repo:
project_root = pw_cli.env.pigweed_environment().PW_PROJECT_ROOT
_LOG.info(
'Formatting %s',
git_repo.describe_files(
repo, Path.cwd(), base, paths, exclude, project_root
),
)
files = collect_files_in_current_repo(
paths,
PresubmitToolRunner(),
modified_since_git_ref=base,
exclude_patterns=exclude,
action_flavor_text='Formatting',
)

# Add files from Git and remove duplicates.
files = sorted(
set(exclude_paths(exclude, git_repo.list_files(base, paths)))
| set(files)
)
elif base:
_LOG.critical(
'A base commit may only be provided if running from a Git repo'
)
return 1
# The format tooling currently expects absolute paths when filtering paths.
files = [Path.cwd() / f for f in files]

return format_files(
files,
Expand Down
46 changes: 11 additions & 35 deletions pw_presubmit/py/pw_presubmit/keep_sorted.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import dataclasses
import difflib
import logging
import os
from pathlib import Path
import re
import sys
Expand All @@ -29,10 +28,10 @@
)

import pw_cli
from pw_cli.collect_files import collect_files_in_current_repo
from pw_cli.diff import colorize_diff
from pw_cli.file_filter import exclude_paths
from pw_cli.plural import plural
from . import cli, git_repo, presubmit, presubmit_context
from . import cli, git_repo, presubmit, presubmit_context, tools

DEFAULT_PATH = Path('out', 'presubmit', 'keep_sorted')

Expand All @@ -59,7 +58,7 @@

@dataclasses.dataclass
class KeepSortedContext:
paths: list[Path]
paths: Sequence[Path]
fix: bool
output_dir: Path
failure_summary_log: Path
Expand Down Expand Up @@ -431,39 +430,16 @@ def keep_sorted_in_repo(
) -> int:
"""Checks or fixes keep-sorted blocks for files in a Git repo."""

files = [Path(path).resolve() for path in paths if os.path.isfile(path)]
project_root = pw_cli.env.project_root()
repo = git_repo.root() if git_repo.is_repo() else None

# Implement a graceful fallback in case the tracking branch isn't available.
if base == git_repo.TRACKING_BRANCH_ALIAS and not git_repo.tracking_branch(
repo
):
_LOG.warning(
'Failed to determine the tracking branch, using --base HEAD~1 '
'instead of listing all files'
)
base = 'HEAD~1'

# If this is a Git repo, list the original paths with git ls-files or diff.
project_root = pw_cli.env.pigweed_environment().PW_PROJECT_ROOT
if repo:
_LOG.info(
'Sorting %s',
git_repo.describe_files(
repo, Path.cwd(), base, paths, exclude, project_root
),
)

# Add files from Git and remove duplicates.
files = sorted(
set(exclude_paths(exclude, git_repo.list_files(base, paths)))
| set(files)
)
elif base:
_LOG.critical(
'A base commit may only be provided if running from a Git repo'
)
return 1
files = collect_files_in_current_repo(
paths,
tools.PresubmitToolRunner(),
modified_since_git_ref=base,
exclude_patterns=exclude,
action_flavor_text='Sorting',
)

outdir: Path
if output_directory:
Expand Down

0 comments on commit 769e458

Please sign in to comment.