Skip to content

Commit

Permalink
Add support for jupyter notebooks
Browse files Browse the repository at this point in the history
Credits to @pakio and @mateuslatrova for the contributions
  • Loading branch information
Fernando-crz authored and alan-barzilay committed Dec 5, 2023
1 parent 03c9248 commit b50b4a7
Show file tree
Hide file tree
Showing 12 changed files with 1,397 additions and 12 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install coverage docopt yarg requests
pip install poetry
poetry install --with dev
- name: Calculate coverage
run: coverage run --source=pipreqs -m unittest discover
run: poetry run coverage run --source=pipreqs -m unittest discover

- name: Create XML report
run: coverage xml
run: poetry run coverage xml

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
Expand Down
9 changes: 9 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ Installation
pip install pipreqs
Obs.: if you don't want support for jupyter notebooks, you can install pipreqs without the dependencies that give support to it.
To do so, run:

.. code-block:: sh
pip install --no-deps pipreqs
pip install yarg==0.1.9 docopt==0.6.2
Usage
-----

Expand Down Expand Up @@ -57,6 +65,7 @@ Usage
<compat> | e.g. Flask~=1.1.2
<gt> | e.g. Flask>=1.1.2
<no-pin> | e.g. Flask
--scan-notebooks Look for imports in jupyter notebook files.

Example
-------
Expand Down
87 changes: 82 additions & 5 deletions pipreqs/pipreqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
<compat> | e.g. Flask~=1.1.2
<gt> | e.g. Flask>=1.1.2
<no-pin> | e.g. Flask
--scan-notebooks Look for imports in jupyter notebook files.
"""
from contextlib import contextmanager
import os
Expand All @@ -53,6 +54,18 @@
REGEXP = [re.compile(r"^import (.+)$"), re.compile(r"^from ((?!\.+).*?) import (?:.*)$")]


scan_noteboooks = False


class NbconvertNotInstalled(ImportError):
default_message = (
"In order to scan jupyter notebooks, please install the nbconvert and ipython libraries"
)

def __init__(self, message=default_message):
super().__init__(message)


@contextmanager
def _open(filename=None, mode="r"):
"""Open a file or ``sys.stdout`` depending on the provided filename.
Expand Down Expand Up @@ -89,26 +102,39 @@ def get_all_imports(path, encoding="utf-8", extra_ignore_dirs=None, follow_links
raw_imports = set()
candidates = []
ignore_errors = False
ignore_dirs = [".hg", ".svn", ".git", ".tox", "__pycache__", "env", "venv"]
ignore_dirs = [
".hg",
".svn",
".git",
".tox",
"__pycache__",
"env",
"venv",
".ipynb_checkpoints",
]

if extra_ignore_dirs:
ignore_dirs_parsed = []
for e in extra_ignore_dirs:
ignore_dirs_parsed.append(os.path.basename(os.path.realpath(e)))
ignore_dirs.extend(ignore_dirs_parsed)

extensions = get_file_extensions()

walk = os.walk(path, followlinks=follow_links)
for root, dirs, files in walk:
dirs[:] = [d for d in dirs if d not in ignore_dirs]

candidates.append(os.path.basename(root))
files = [fn for fn in files if os.path.splitext(fn)[1] == ".py"]
py_files = [file for file in files if file_ext_is_allowed(file, [".py"])]
candidates.extend([os.path.splitext(filename)[0] for filename in py_files])

files = [fn for fn in files if file_ext_is_allowed(fn, extensions)]

candidates += [os.path.splitext(fn)[0] for fn in files]
for file_name in files:
file_name = os.path.join(root, file_name)
with open(file_name, "r", encoding=encoding) as f:
contents = f.read()
contents = read_file_content(file_name, encoding)

try:
tree = ast.parse(contents)
for node in ast.walk(tree):
Expand Down Expand Up @@ -145,6 +171,40 @@ def get_all_imports(path, encoding="utf-8", extra_ignore_dirs=None, follow_links
return list(packages - data)


def get_file_extensions():
return [".py", ".ipynb"] if scan_noteboooks else [".py"]


def read_file_content(file_name: str, encoding="utf-8"):
if file_ext_is_allowed(file_name, [".py"]):
with open(file_name, "r", encoding=encoding) as f:
contents = f.read()
elif file_ext_is_allowed(file_name, [".ipynb"]) and scan_noteboooks:
contents = ipynb_2_py(file_name, encoding=encoding)
return contents


def file_ext_is_allowed(file_name, acceptable):
return os.path.splitext(file_name)[1] in acceptable


def ipynb_2_py(file_name, encoding="utf-8"):
"""
Args:
file_name (str): notebook file path to parse as python script
encoding (str): encoding of file
Returns:
str: parsed string
"""
exporter = PythonExporter()
(body, _) = exporter.from_filename(file_name)

return body.encode(encoding)


def generate_requirements_file(path, imports, symbol):
with _open(path, "w") as out_file:
logging.debug(
Expand Down Expand Up @@ -427,10 +487,27 @@ def dynamic_versioning(scheme, imports):
return imports, symbol


def handle_scan_noteboooks():
if not scan_noteboooks:
logging.info("Not scanning for jupyter notebooks.")
return

try:
global PythonExporter
from nbconvert import PythonExporter
except ImportError:
raise NbconvertNotInstalled()


def init(args):
global scan_noteboooks
encoding = args.get("--encoding")
extra_ignore_dirs = args.get("--ignore")
follow_links = not args.get("--no-follow-links")

scan_noteboooks = args.get("--scan-notebooks", False)
handle_scan_noteboooks()

input_path = args["<path>"]

if encoding is None:
Expand Down
Loading

0 comments on commit b50b4a7

Please sign in to comment.