Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for jupyter notebook #414

Merged
merged 3 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install coverage docopt yarg requests
pip install poetry
poetry install --with dev

- name: Calculate coverage
run: coverage run --source=pipreqs -m unittest discover
run: poetry run coverage run --source=pipreqs -m unittest discover

- name: Create XML report
run: coverage xml
run: poetry run coverage xml

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
Expand Down
9 changes: 9 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ Installation

pip install pipreqs

Obs.: if you don't want support for jupyter notebooks, you can install pipreqs without the dependencies that give support to it.
To do so, run:

.. code-block:: sh

pip install --no-deps pipreqs
pip install yarg==0.1.9 docopt==0.6.2

Usage
-----

Expand Down Expand Up @@ -57,6 +65,7 @@ Usage
<compat> | e.g. Flask~=1.1.2
<gt> | e.g. Flask>=1.1.2
<no-pin> | e.g. Flask
--scan-notebooks Look for imports in jupyter notebook files.

Example
-------
Expand Down
96 changes: 88 additions & 8 deletions pipreqs/pipreqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
<compat> | e.g. Flask~=1.1.2
<gt> | e.g. Flask>=1.1.2
<no-pin> | e.g. Flask
--scan-notebooks Look for imports in jupyter notebook files.
"""
from contextlib import contextmanager
import os
Expand All @@ -53,6 +54,18 @@
REGEXP = [re.compile(r"^import (.+)$"), re.compile(r"^from ((?!\.+).*?) import (?:.*)$")]


scan_noteboooks = False


class NbconvertNotInstalled(ImportError):
default_message = (
"In order to scan jupyter notebooks, please install the nbconvert and ipython libraries"
)

def __init__(self, message=default_message):
super().__init__(message)


@contextmanager
def _open(filename=None, mode="r"):
"""Open a file or ``sys.stdout`` depending on the provided filename.
Expand Down Expand Up @@ -84,31 +97,44 @@ def _open(filename=None, mode="r"):
file.close()


def get_all_imports(path, encoding=None, extra_ignore_dirs=None, follow_links=True):
def get_all_imports(path, encoding="utf-8", extra_ignore_dirs=None, follow_links=True):
imports = set()
raw_imports = set()
candidates = []
ignore_errors = False
ignore_dirs = [".hg", ".svn", ".git", ".tox", "__pycache__", "env", "venv"]
ignore_dirs = [
".hg",
".svn",
".git",
".tox",
"__pycache__",
"env",
"venv",
".ipynb_checkpoints",
]

if extra_ignore_dirs:
ignore_dirs_parsed = []
for e in extra_ignore_dirs:
ignore_dirs_parsed.append(os.path.basename(os.path.realpath(e)))
ignore_dirs.extend(ignore_dirs_parsed)

extensions = get_file_extensions()

walk = os.walk(path, followlinks=follow_links)
for root, dirs, files in walk:
dirs[:] = [d for d in dirs if d not in ignore_dirs]

candidates.append(os.path.basename(root))
alan-barzilay marked this conversation as resolved.
Show resolved Hide resolved
files = [fn for fn in files if os.path.splitext(fn)[1] == ".py"]
py_files = [file for file in files if file_ext_is_allowed(file, [".py"])]
candidates.extend([os.path.splitext(filename)[0] for filename in py_files])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Fernando-crz @mateuslatrova help me sanity check my understanding of the files and candidates objects, files will be parsed and processed to get the list of imported packages, but this list will be contaminated by stdlib packages and local imports which will be removed in other processing steps (lines 165 and 171).

The candidates list is simply a list of local files that could be imported (like an utils.py file) but shouldnt be a part of the requirements file. Is that it? If so, isnt candidates an awful name? what are they candidates of? also, this would explain why we are adding dir by dir to the list, as I raised in issue #424 and on our last meeting.

Maybe we could take this opportunity to rename those objects to something more intuitive? like local_files or local_modules since dirs can also be imported.
If you think this should be in another pr, ok

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe
py_files -> local_files
candidates -> local_modules
(pretty sure thts the correct nomenclature but we could use a double check)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just leave it for another PR? 🫥
We are not completely sure what is going on with those variables. We just copied the logic as it was before.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Fernando-crz my dude, you gotta understand whats going on and not just blindly reuse code haha
i opened issue #427 to keep track of this


files = [fn for fn in files if file_ext_is_allowed(fn, extensions)]

candidates += [os.path.splitext(fn)[0] for fn in files]
for file_name in files:
file_name = os.path.join(root, file_name)
with open(file_name, "r", encoding=encoding) as f:
contents = f.read()
contents = read_file_content(file_name, encoding)

try:
tree = ast.parse(contents)
for node in ast.walk(tree):
Expand Down Expand Up @@ -145,6 +171,40 @@ def get_all_imports(path, encoding=None, extra_ignore_dirs=None, follow_links=Tr
return list(packages - data)


def get_file_extensions():
return [".py", ".ipynb"] if scan_noteboooks else [".py"]


def read_file_content(file_name: str, encoding="utf-8"):
if file_ext_is_allowed(file_name, [".py"]):
with open(file_name, "r", encoding=encoding) as f:
contents = f.read()
elif file_ext_is_allowed(file_name, [".ipynb"]) and scan_noteboooks:
contents = ipynb_2_py(file_name, encoding=encoding)
return contents


def file_ext_is_allowed(file_name, acceptable):
return os.path.splitext(file_name)[1] in acceptable


def ipynb_2_py(file_name, encoding="utf-8"):
"""

Args:
file_name (str): notebook file path to parse as python script
encoding (str): encoding of file

Returns:
str: parsed string

"""
exporter = PythonExporter()
(body, _) = exporter.from_filename(file_name)

return body.encode(encoding)


def generate_requirements_file(path, imports, symbol):
with _open(path, "w") as out_file:
logging.debug(
Expand Down Expand Up @@ -199,7 +259,7 @@ def get_imports_info(imports, pypi_server="https://pypi.python.org/pypi/", proxy
return result


def get_locally_installed_packages(encoding=None):
def get_locally_installed_packages(encoding="utf-8"):
packages = []
ignore = ["tests", "_tests", "egg", "EGG", "info"]
for path in sys.path:
Expand Down Expand Up @@ -240,7 +300,7 @@ def get_locally_installed_packages(encoding=None):
return packages


def get_import_local(imports, encoding=None):
def get_import_local(imports, encoding="utf-8"):
alan-barzilay marked this conversation as resolved.
Show resolved Hide resolved
local = get_locally_installed_packages()
result = []
for item in imports:
Expand Down Expand Up @@ -427,11 +487,31 @@ def dynamic_versioning(scheme, imports):
return imports, symbol


def handle_scan_noteboooks():
if not scan_noteboooks:
logging.info("Not scanning for jupyter notebooks.")
return

try:
global PythonExporter
from nbconvert import PythonExporter
except ImportError:
raise NbconvertNotInstalled()


def init(args):
global scan_noteboooks
encoding = args.get("--encoding")
extra_ignore_dirs = args.get("--ignore")
follow_links = not args.get("--no-follow-links")

scan_noteboooks = args.get("--scan-notebooks", False)
handle_scan_noteboooks()

input_path = args["<path>"]

if encoding is None:
encoding = "utf-8"
if input_path is None:
input_path = os.path.abspath(os.curdir)

Expand Down
Loading
Loading