Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: automatically format pip install cell #219

Merged
merged 4 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@
"ipython",
"mkdir",
"mypy",
"oneline",
"pytest",
"PYTHONHASHSEED",
"repoma",
"sympy",
"toctree",
"Zenodo"
],
Expand Down
9 changes: 3 additions & 6 deletions src/repoma/colab_toc_visible.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from typing import Optional, Sequence

import nbformat
from nbformat import NotebookNode

from repoma.utilities.notebook import load_notebook

from .errors import PrecommitError
from .utilities.executor import Executor
Expand All @@ -30,7 +31,7 @@


def _update_metadata(path: str) -> None:
notebook = open_notebook(path)
notebook = load_notebook(path)

Check warning on line 34 in src/repoma/colab_toc_visible.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/colab_toc_visible.py#L34

Added line #L34 was not covered by tests
metadata = notebook["metadata"]
updated = False
if metadata.get("colab") is None:
Expand All @@ -46,9 +47,5 @@
raise PrecommitError(msg)


def open_notebook(path: str) -> NotebookNode:
return nbformat.read(path, as_version=nbformat.NO_CONVERT)


if __name__ == "__main__":
sys.exit(main())
12 changes: 5 additions & 7 deletions src/repoma/fix_nbformat_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import nbformat

from repoma.utilities.notebook import load_notebook

from .errors import PrecommitError
from .utilities.executor import Executor

Expand All @@ -34,22 +36,22 @@


def set_nbformat_version(filename: str) -> None:
notebook = open_notebook(filename)
notebook = load_notebook(filename)

Check warning on line 39 in src/repoma/fix_nbformat_version.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/fix_nbformat_version.py#L39

Added line #L39 was not covered by tests
if notebook["nbformat_minor"] != 4: # noqa: PLR2004
notebook["nbformat_minor"] = 4
nbformat.write(notebook, filename)


def remove_cell_ids(filename: str) -> None:
notebook = open_notebook(filename)
notebook = load_notebook(filename)

Check warning on line 46 in src/repoma/fix_nbformat_version.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/fix_nbformat_version.py#L46

Added line #L46 was not covered by tests
for cell in notebook["cells"]:
if "id" in cell:
del cell["id"]
nbformat.write(notebook, filename)


def check_svg_output_cells(filename: str) -> None:
notebook = open_notebook(filename)
notebook = load_notebook(filename)

Check warning on line 54 in src/repoma/fix_nbformat_version.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/fix_nbformat_version.py#L54

Added line #L54 was not covered by tests
for i, cell in enumerate(notebook["cells"]):
for output in cell.get("outputs", []):
data = output.get("data", {})
Expand All @@ -66,9 +68,5 @@
)


def open_notebook(filename: str) -> dict:
return nbformat.read(filename, as_version=nbformat.NO_CONVERT)


if __name__ == "__main__":
sys.exit(main())
140 changes: 96 additions & 44 deletions src/repoma/pin_nb_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,43 @@
"""

import argparse
import re
import sys
from functools import lru_cache
from textwrap import dedent
from typing import List, Optional, Sequence

import nbformat
from nbformat import NotebookNode

from repoma.utilities.executor import Executor
from repoma.utilities.notebook import load_notebook

from .errors import PrecommitError

__PIP_INSTALL_STATEMENT = "%pip install -q "
__EXPECTED_PIP_INSTALL_LINE = "%pip install -q"


def check_pinned_requirements(filename: str) -> None:
notebook = nbformat.read(filename, as_version=nbformat.NO_CONVERT)
notebook = load_notebook(filename)

Check warning on line 28 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L28

Added line #L28 was not covered by tests
if not __has_python_kernel(notebook):
return
for cell in notebook["cells"]:
for cell_id, cell in enumerate(notebook["cells"]):
if cell["cell_type"] != "code":
continue
source: str = cell["source"]
src_lines = source.split("\n")
if len(src_lines) == 0:
continue
cell_content = "".join(s.strip("\\") for s in src_lines)
if not cell_content.startswith(__PIP_INSTALL_STATEMENT):
source = __to_oneline(cell["source"])
pip_requirements = extract_pip_requirements(source)

Check warning on line 35 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L34-L35

Added lines #L34 - L35 were not covered by tests
if pip_requirements is None:
continue
executor = Executor()
executor(__check_install_statement, filename, cell_content)
executor(__check_requirements, filename, cell_content)
executor(__update_metadata, filename, cell["metadata"], notebook)
executor(_check_pip_requirements, filename, pip_requirements)
executor(_format_pip_requirements, filename, source, notebook, cell_id)
executor(_update_metadata, filename, cell["metadata"], notebook)

Check warning on line 41 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L39-L41

Added lines #L39 - L41 were not covered by tests
executor.finalize()
return
msg = (
f'Notebook "{filename}" does not contain a pip install cell of the form'
f" {__PIP_INSTALL_STATEMENT}some-package==0.1.0 package2==3.2"
f" {__EXPECTED_PIP_INSTALL_LINE} some-package==0.1.0 package2==3.2"
)
raise PrecommitError(msg)

Expand All @@ -55,50 +56,101 @@
return "python" in kernel_language


def __check_install_statement(filename: str, install_statement: str) -> None:
if not install_statement.startswith(__PIP_INSTALL_STATEMENT):
msg = (
f"First shell cell in notebook {filename} does not start with"
f" {__PIP_INSTALL_STATEMENT}"
)
raise PrecommitError(msg)
if install_statement.endswith("/dev/null"):
msg = (
"Remove the /dev/null from the pip install statement in notebook"
f" {filename}"
)
raise PrecommitError(msg)
@lru_cache(maxsize=1)
def __to_oneline(source: str) -> str:
src_lines = source.split("\n")
return "".join(s.rstrip().rstrip("\\") for s in src_lines)


@lru_cache(maxsize=1)
def extract_pip_requirements(source: str) -> Optional[List[str]]:
r"""Check if the source in a cell is a pip install statement.

>>> extract_pip_requirements("Not a pip install statement")
>>> extract_pip_requirements("pip install")
[]
>>> extract_pip_requirements("pip3 install attrs")
['attrs']
>>> extract_pip_requirements("pip3 install -q attrs")
['attrs']
>>> extract_pip_requirements("pip3 install attrs &> /dev/null")
['attrs']
>>> extract_pip_requirements("%pip install attrs numpy==1.24.4 ")
['attrs', 'numpy==1.24.4']
>>> extract_pip_requirements("!python3 -mpip install sympy")
['sympy']
>>> extract_pip_requirements('''
... python3 -m pip install \
... attrs numpy \
... sympy \
... tensorflow
... ''')
['attrs', 'numpy', 'sympy', 'tensorflow']
"""
# cspell:ignore mpip
matches = re.match(
r"[%\!]?\s*(python3?\s+-m\s*)?pip3?\s+install\s*(-q)?(.*?)(&?>\s*/dev/null)?$",
__to_oneline(source).strip(),
)
if matches is None:
return None
packages = matches.group(3).split(" ")
packages = [p.strip() for p in packages]
return [p for p in packages if p]


def __check_requirements(filename: str, install_statement: str) -> None:
package_listing = install_statement.replace(__PIP_INSTALL_STATEMENT, "")
requirements = package_listing.split(" ")
def _check_pip_requirements(filename: str, requirements: List[str]) -> None:
if len(requirements) == 0:
msg = f'At least one dependency required in install cell of "{filename}"'
raise PrecommitError(msg)
for requirement in requirements:
requirement = requirement.strip()
if not requirement:
for req in requirements:
req = req.strip()

Check warning on line 107 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L107

Added line #L107 was not covered by tests
if not req:
continue
if "git+" in requirement:
if "git+" in req:
continue
if not any(equal_sign in requirement for equal_sign in ["==", "~="]):
unpinned_requirements = []

Check warning on line 112 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L112

Added line #L112 was not covered by tests
for req in requirements:
if req.startswith("git+"):
continue

Check warning on line 115 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L115

Added line #L115 was not covered by tests
if any(equal_sign in req for equal_sign in ["==", "~="]):
continue
package = req.split("<")[0].split(">")[0].strip()
unpinned_requirements.append(package)

Check warning on line 119 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L117-L119

Added lines #L117 - L119 were not covered by tests
if unpinned_requirements:
msg = (
f'Install cell in notebook "{filename}" contains a requirement without'
f" == or ~= ({requirement})"
f'Install cell in notebook "{filename}" contains requirements without'
"pinning (== or ~=):"
)
for req in unpinned_requirements:
msg += f"\n - {req}"
msg += dedent(f"""

Check warning on line 127 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L126-L127

Added lines #L126 - L127 were not covered by tests
Get the currently installed versions with:

python3 -m pip freeze | grep -iE '{"|".join(sorted(unpinned_requirements))}'
""")
raise PrecommitError(msg)
requirements_lower = [r.lower() for r in requirements if not r.startswith("git+")]
if sorted(requirements_lower) != requirements_lower:
sorted_requirements = " ".join(sorted(requirements))
msg = (
f'Requirements in notebook "{filename}" are not sorted alphabetically.'
f" Should be:\n\n {sorted_requirements}"
)


def _format_pip_requirements(
filename: str, install_statement: str, notebook: NotebookNode, cell_id: int
) -> None:
requirements = extract_pip_requirements(install_statement)

Check warning on line 138 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L138

Added line #L138 was not covered by tests
if requirements is None:
return

Check warning on line 140 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L140

Added line #L140 was not covered by tests
git_requirements = {r for r in requirements if r.startswith("git+")}
pip_requirements = set(requirements) - git_requirements

Check warning on line 142 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L142

Added line #L142 was not covered by tests
pip_requirements = {r.lower().replace("_", "-") for r in pip_requirements}
sorted_requirements = sorted(pip_requirements) + sorted(git_requirements)
expected = f"{__EXPECTED_PIP_INSTALL_LINE} {' '.join(sorted_requirements)}"

Check warning on line 145 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L144-L145

Added lines #L144 - L145 were not covered by tests
if install_statement != expected:
notebook["cells"][cell_id]["source"] = expected
nbformat.write(notebook, filename)
msg = f'Ordered and formatted pip install cell in "{filename}"'

Check warning on line 149 in src/repoma/pin_nb_requirements.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/pin_nb_requirements.py#L147-L149

Added lines #L147 - L149 were not covered by tests
raise PrecommitError(msg)


def __update_metadata(filename: str, metadata: dict, notebook: NotebookNode) -> None:
def _update_metadata(filename: str, metadata: dict, notebook: NotebookNode) -> None:
updated_metadata = False
jupyter_metadata = metadata.get("jupyter")
if jupyter_metadata is not None and jupyter_metadata.get("source_hidden"):
Expand Down
7 changes: 4 additions & 3 deletions src/repoma/set_nb_cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import nbformat

from repoma.utilities.notebook import load_notebook
from repoma.utilities.project_info import get_pypi_name

__CONFIG_CELL_CONTENT = """
Expand Down Expand Up @@ -132,7 +133,7 @@
) -> None:
if _skip_notebook(filename):
return
notebook = nbformat.read(filename, as_version=nbformat.NO_CONVERT)
notebook = load_notebook(filename)

Check warning on line 136 in src/repoma/set_nb_cells.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/set_nb_cells.py#L136

Added line #L136 was not covered by tests
exiting_cell = notebook["cells"][cell_id]
new_cell = nbformat.v4.new_code_cell(
new_content,
Expand All @@ -150,7 +151,7 @@
def _insert_autolink_concat(filename: str) -> None:
if _skip_notebook(filename, ignore_statement="<!-- no autolink-concat -->"):
return
notebook = nbformat.read(filename, as_version=nbformat.NO_CONVERT)
notebook = load_notebook(filename)

Check warning on line 154 in src/repoma/set_nb_cells.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/set_nb_cells.py#L154

Added line #L154 was not covered by tests
expected_cell_content = """
```{autolink-concat}
```
Expand All @@ -173,7 +174,7 @@
def _skip_notebook(
filename: str, ignore_statement: str = "<!-- no-set-nb-cells -->"
) -> bool:
notebook = nbformat.read(filename, as_version=nbformat.NO_CONVERT)
notebook = load_notebook(filename)

Check warning on line 177 in src/repoma/set_nb_cells.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/set_nb_cells.py#L177

Added line #L177 was not covered by tests
for cell in notebook["cells"]:
if cell["cell_type"] != "markdown":
continue
Expand Down
8 changes: 8 additions & 0 deletions src/repoma/utilities/notebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Helper tools for working with Jupyter Notebooks."""

import nbformat
from nbformat import NotebookNode


def load_notebook(path: str) -> NotebookNode:
return nbformat.read(path, as_version=nbformat.NO_CONVERT)

Check warning on line 8 in src/repoma/utilities/notebook.py

View check run for this annotation

Codecov / codecov/patch

src/repoma/utilities/notebook.py#L8

Added line #L8 was not covered by tests
Loading