Skip to content

Commit

Permalink
Merge pull request #34 from fabianhe/dev
Browse files Browse the repository at this point in the history
Release 0.7.0
  • Loading branch information
fabianhe authored Jul 14, 2021
2 parents 2780e5c + e2424b0 commit b9a500d
Show file tree
Hide file tree
Showing 46 changed files with 950 additions and 598 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ jobs:
run: poetry install --no-interaction
- name: Run pre-commit hooks
run: pre-commit run --all-files
- name: Run tests
run: poetry run pytest
- name: Run pre-push hooks
run: pre-commit run --hook-stage push --all-files
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,8 @@ dmypy.json

# macOS
.DS_Store

# Other
measurements
experimental
executables
11 changes: 10 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,13 @@ repos:
language: system
pass_filenames: false
always_run: true
entry: poetry run typer pyrepositoryminer.main utils docs --name pyrepositoryminer --output DOCS.md
entry: poetry run typer pyrepositoryminer utils docs --name pyrepositoryminer --output DOCS.md
- repo: local
hooks:
- id: pytest
name: pytest
entry: pytest
language: system
pass_filenames: false
always_run: true
stages: [push]
10 changes: 6 additions & 4 deletions DOCS.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,18 @@ Either provide the commit ids to analyze on stdin or as a file argument.
**Usage**:

```console
$ pyrepositoryminer analyze [OPTIONS] REPOSITORY [COMMITS] [METRICS]:[blobcount|complexity|halstead|linecount|linelength|loc|maintainability|nesting|pylinecount|raw|tokei]...
$ pyrepositoryminer analyze [OPTIONS] REPOSITORY [METRICS]:[blobcount|cacherate|complexity|difftokei|halstead|linecount|linelength|loc|maintainability|nesting|pylinecount|raw|seerene|tokei|touchedlines]...
```

**Arguments**:

* `REPOSITORY`: The path to the bare repository. [required]
* `[COMMITS]`: The newline-separated input file of commit ids. Commit ids are read from stdin if this is not passed. [default: -]
* `[METRICS]:[blobcount|complexity|halstead|linecount|linelength|loc|maintainability|nesting|pylinecount|raw|tokei]...`
* `[METRICS]:[blobcount|cacherate|complexity|difftokei|halstead|linecount|linelength|loc|maintainability|nesting|pylinecount|raw|seerene|tokei|touchedlines]...`

**Options**:

* `--commits FILENAME`: The newline-separated input file of commit ids. Commit ids are read from stdin if this is not passed.
* `--custom-metrics TEXT`: [default: ]
* `--workers INTEGER`: [default: 1]
* `--help`: Show this message and exit.

Expand Down Expand Up @@ -81,6 +82,7 @@ $ pyrepositoryminer clone [OPTIONS] URL PATH

**Options**:

* `--bare / --no-bare`: [default: True]
* `--help`: Show this message and exit.

## `pyrepositoryminer commits`
Expand All @@ -103,7 +105,7 @@ $ pyrepositoryminer commits [OPTIONS] REPOSITORY [BRANCHES]
**Options**:

* `--simplify-first-parent / --no-simplify-first-parent`: [default: True]
* `--drop-duplicates / --no-drop-duplicates`: [default: False]
* `--drop-duplicates / --no-drop-duplicates`: [default: True]
* `--sort [topological|time|none]`: [default: topological]
* `--sort-reverse / --no-sort-reverse`: [default: True]
* `--limit INTEGER`
Expand Down
15 changes: 14 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
FROM python:latest

RUN pip install pyrepositoryminer
WORKDIR /app

RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | POETRY_HOME=/opt/poetry python && \
cd /usr/local/bin && \
ln -s /opt/poetry/bin/poetry && \
poetry config virtualenvs.create false

COPY ./pyproject.toml ./poetry.lock README.md /app/
RUN poetry install --no-root --no-dev --no-interaction --no-ansi
COPY ./pyrepositoryminer /app/pyrepositoryminer
COPY ./executables /app/executables
ENV EXECUTABLES=/app/executables
ENV PYTHONPATH=/app
RUN pip install .
238 changes: 121 additions & 117 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pyrepositoryminer"
version = "0.6.0"
version = "0.7.0"
description = "Efficient Repository Mining in Python"
license = "GPL-3.0-or-later"
authors = ["Fabian Heseding <[email protected]>"]
Expand All @@ -10,7 +10,7 @@ repository = "https://github.com/fabianhe/pyrepositoryminer"
documentation = "https://github.com/fabianhe/pyrepositoryminer/blob/master/DOCS.md"

[tool.poetry.scripts]
pyrepositoryminer = "pyrepositoryminer.main:app"
pyrepositoryminer = "pyrepositoryminer:app"

[tool.poetry.dependencies]
python = "^3.8"
Expand Down
14 changes: 13 additions & 1 deletion pyrepositoryminer/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,13 @@
__version__ = "0.6.0"
__version__ = "0.7.0"

from typer import Typer

from pyrepositoryminer.commands import analyze, branch, clone, commits

app = Typer(help="Efficient Repository Mining in Python.")
app.command()(analyze)
app.command()(branch)
app.command()(clone)
app.command()(commits)

__all__ = ("app",)
2 changes: 1 addition & 1 deletion pyrepositoryminer/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from pyrepositoryminer.main import app
from pyrepositoryminer import app

app(prog_name="pyrepositoryminer")
81 changes: 57 additions & 24 deletions pyrepositoryminer/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
from asyncio import run
from asyncio.tasks import as_completed
from pathlib import Path
from typing import Any, Awaitable, Iterable, NamedTuple, Optional, Tuple
from typing import Any, Awaitable, Iterable, List, NamedTuple, Optional, Tuple

from pygit2 import Commit, Repository
from uvloop import install

from pyrepositoryminer.metrics import all_metrics
from pyrepositoryminer.metrics.diffdir.main import DiffDirMetric, DiffDirVisitor
from pyrepositoryminer.metrics.dir.main import DirMetric, DirVisitor
from pyrepositoryminer.metrics.nativeblob.main import (
NativeBlobMetric,
Expand All @@ -22,12 +23,13 @@
)
from pyrepositoryminer.metrics.structs import Metric
from pyrepositoryminer.output import CommitOutput, format_output, parse_commit
from pyrepositoryminer.visitableobject import VisitableObject
from pyrepositoryminer.pobjects import Object


class InitArgs(NamedTuple):
repository: Path
metrics: Tuple[str, ...]
custom_metrics: Tuple[Any, ...] # TODO make this a metric abc


repo: Repository
Expand All @@ -37,6 +39,8 @@ class InitArgs(NamedTuple):
native_tree_visitor: NativeTreeVisitor
dir_metrics: Tuple[Any, ...]
dir_visitor: DirVisitor
diffdir_visitor: DiffDirVisitor
diffdir_metrics: Tuple[Any, ...]


async def categorize_metrics(
Expand Down Expand Up @@ -65,19 +69,29 @@ async def analyze(commit_id: str) -> Optional[CommitOutput]:
else:
if commit is None or not isinstance(commit, Commit):
return None
root = VisitableObject.from_object(commit)
futures = [
m(blob_tup)
async for blob_tup in native_blob_visitor(root)
for m in native_blob_metrics
if not (await m.filter(blob_tup))
]
tree_tup = await native_tree_visitor(root)
futures.extend(m(tree_tup) for m in native_tree_metrics)
await dir_visitor(root)
async with dir_visitor:
futures.extend(m(dir_visitor.dir_tup) for m in dir_metrics)
mets = await categorize_metrics(*futures)
root = Object.from_pobject(commit)
futures: List[Awaitable[Iterable[Metric]]] = []
if native_blob_metrics:
futures.extend(
m(blob_tup)
for blob_tup in native_blob_visitor(root)
for m in native_blob_metrics
if not m.filter(blob_tup)
)
if native_tree_metrics:
tree_tup = native_tree_visitor(root)
futures.extend(m(tree_tup) for m in native_tree_metrics)
if dir_metrics:
dir_tup = dir_visitor(root)
futures.extend(m(dir_tup) for m in dir_metrics)
if diffdir_metrics:
diffdir_tup = diffdir_visitor(root)
futures.extend(m(diffdir_tup) for m in diffdir_metrics)
mets = await categorize_metrics(*futures)
if dir_metrics:
dir_visitor.close()
if diffdir_metrics:
diffdir_visitor.close()
return parse_commit(commit, *mets)


Expand All @@ -87,25 +101,44 @@ def initialize(init_args: InitArgs) -> None:
global native_blob_metrics, native_blob_visitor
global native_tree_metrics, native_tree_visitor
global dir_metrics, dir_visitor
global diffdir_metrics, diffdir_visitor
repo = Repository(init_args.repository)
native_blob_metrics = tuple(
all_metrics[m]()
for m in init_args.metrics
if issubclass(all_metrics[m], NativeBlobMetric)
[
all_metrics[m]()
for m in init_args.metrics
if issubclass(all_metrics[m], NativeBlobMetric)
]
+ [m() for m in init_args.custom_metrics if issubclass(m, NativeBlobMetric)]
)
native_blob_visitor = NativeBlobVisitor()
native_tree_metrics = tuple(
all_metrics[m]()
for m in init_args.metrics
if issubclass(all_metrics[m], NativeTreeMetric)
[
all_metrics[m]()
for m in init_args.metrics
if issubclass(all_metrics[m], NativeTreeMetric)
]
+ [m() for m in init_args.custom_metrics if issubclass(m, NativeTreeMetric)]
)
native_tree_visitor = NativeTreeVisitor()
dir_metrics = tuple(
all_metrics[m]()
for m in init_args.metrics
if issubclass(all_metrics[m], DirMetric)
[
all_metrics[m]()
for m in init_args.metrics
if issubclass(all_metrics[m], DirMetric)
]
+ [m() for m in init_args.custom_metrics if issubclass(m, DirMetric)]
)
dir_visitor = DirVisitor(repo)
diffdir_metrics = tuple(
[
all_metrics[m]()
for m in init_args.metrics
if issubclass(all_metrics[m], DiffDirMetric)
]
+ [m() for m in init_args.custom_metrics if issubclass(m, DiffDirMetric)]
)
diffdir_visitor = DiffDirVisitor(repo)


def worker(commit_id: str) -> Optional[str]:
Expand Down
6 changes: 6 additions & 0 deletions pyrepositoryminer/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from pyrepositoryminer.commands.analyze import analyze
from pyrepositoryminer.commands.branch import branch
from pyrepositoryminer.commands.clone import clone
from pyrepositoryminer.commands.commits import commits

__all__ = ("analyze", "branch", "clone", "commits")
81 changes: 81 additions & 0 deletions pyrepositoryminer/commands/analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from enum import Enum
from functools import reduce
from importlib import import_module
from inspect import isclass
from multiprocessing import Pool
from pathlib import Path
from sys import stdin
from typing import List, Optional

from typer import Abort, Argument, Option, echo
from typer.models import FileText

from pyrepositoryminer.analyze import InitArgs, initialize, worker
from pyrepositoryminer.metrics import all_metrics
from pyrepositoryminer.metrics.dir.main import DirMetric
from pyrepositoryminer.metrics.nativeblob.main import NativeBlobMetric
from pyrepositoryminer.metrics.nativetree.main import NativeTreeMetric

AvailableMetrics = Enum( # type: ignore
# https://github.com/python/mypy/issues/5317
"AvailableMetrics",
[(k, k) for k in sorted(all_metrics.keys())],
)


def import_metric(import_str: str): # type: ignore # TODO make this a metric abc
module_str, _, attrs_str = import_str.partition(":")
if not module_str or not attrs_str:
echo(f'Import string "{import_str}" must be in format "<module>:<attribute>"')
raise Abort()
try:
module = import_module(module_str)
except ImportError as e:
if e.name != module_str:
raise e from None
echo(f'Could not import module "{module_str}"')
raise Abort()
try:
instance = reduce(getattr, (module, *attrs_str.split("."))) # type: ignore
except AttributeError:
print(f'Attribute "{attrs_str}" not found in module "{module_str}"')
raise Abort()
if not isclass(instance):
print(f'Instance "{instance}" must be a class')
raise Abort()
parents = (NativeBlobMetric, NativeTreeMetric, DirMetric)
if not any(issubclass(instance, parent) for parent in parents): # type: ignore
print(f'Instance "{instance}" must subclass a pyrepositoryminer metric class')
raise Abort()
return instance


def analyze(
repository: Path = Argument(..., help="The path to the bare repository."),
metrics: Optional[List[AvailableMetrics]] = Argument(None, case_sensitive=False),
commits: Optional[FileText] = Option(
None,
help="The newline-separated input file of commit ids. Commit ids are read from stdin if this is not passed.", # noqa: E501
),
custom_metrics: List[str] = Option([]),
workers: int = 1,
) -> None:
"""Analyze commits of a repository.
Either provide the commit ids to analyze on stdin or as a file argument."""
metrics = metrics if metrics else []
ids = (id.strip() for id in (commits if commits else stdin))
with Pool(
max(workers, 1),
initialize,
(
InitArgs(
repository,
tuple({metric.value for metric in metrics} & all_metrics.keys()),
tuple(map(import_metric, set(custom_metrics))),
),
),
) as pool:
results = (res for res in pool.imap(worker, ids) if res is not None)
for result in results:
echo(result)
23 changes: 23 additions & 0 deletions pyrepositoryminer/commands/branch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from pathlib import Path
from typing import Iterable

from pygit2 import Repository
from typer import Argument, echo


def branch(
repository: Path = Argument(..., help="The path to the bare repository."),
local: bool = True,
remote: bool = False,
) -> None:
"""Get the branches of a repository."""
repo = Repository(repository)
branches: Iterable[str]
if local and remote:
branches = repo.branches
elif local:
branches = repo.branches.local
elif remote:
branches = repo.branches.remote
for branch_name in branches:
echo(branch_name)
8 changes: 8 additions & 0 deletions pyrepositoryminer/commands/clone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from pathlib import Path

from pygit2 import clone_repository


def clone(url: str, path: Path, bare: bool = True) -> None:
"Clone a repository to a path."
clone_repository(url, path, bare=bare)
Loading

0 comments on commit b9a500d

Please sign in to comment.