bndr · Fernando-crz · Jul 2, 2020 · Oct 17, 2023 · Oct 22, 2023 · Oct 23, 2023
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -39,7 +39,8 @@ jobs:
             - name: Install dependencies
               run: |
                   python -m pip install --upgrade pip
-                  pip install coverage docopt yarg requests
+                  pip install coverage docopt yarg requests nbconvert
+                  pip install .[jupyter]
 
             - name: Calculate coverage
               run: coverage run --source=pipreqs -m unittest discover

diff --git a/pipreqs/pipreqs.py b/pipreqs/pipreqs.py
@@ -35,6 +35,7 @@
                           <compat> | e.g. Flask~=1.1.2
                           <gt>     | e.g. Flask>=1.1.2
                           <no-pin> | e.g. Flask
+    --ignore-notebooks    Ignore jupyter notebook files.
 """
 from contextlib import contextmanager
 import os
@@ -48,9 +49,19 @@
 from yarg import json2package
 from yarg.exceptions import HTTPError
 
+try:
+    PythonExporter = None
+    ignore_notebooks = False
+    from nbconvert import PythonExporter
+except ImportError:
+    pass
+
 from pipreqs import __version__
 
-REGEXP = [re.compile(r"^import (.+)$"), re.compile(r"^from ((?!\.+).*?) import (?:.*)$")]
+REGEXP = [
+    re.compile(r"^import (.+)$"),
+    re.compile(r"^from ((?!\.+).*?) import (?:.*)$"),
+]
 
 
 @contextmanager
@@ -84,12 +95,21 @@ def _open(filename=None, mode="r"):
             file.close()
 
 
-def get_all_imports(path, encoding=None, extra_ignore_dirs=None, follow_links=True):
+def get_all_imports(path, encoding="utf-8", extra_ignore_dirs=None, follow_links=True):
     imports = set()
     raw_imports = set()
     candidates = []
     ignore_errors = False
-    ignore_dirs = [".hg", ".svn", ".git", ".tox", "__pycache__", "env", "venv"]
+    ignore_dirs = [
+        ".hg",
+        ".svn",
+        ".git",
+        ".tox",
+        "__pycache__",
+        "env",
+        "venv",
+        ".ipynb_checkpoints",
+    ]
 
     if extra_ignore_dirs:
         ignore_dirs_parsed = []
@@ -102,13 +122,26 @@ def get_all_imports(path, encoding=None, extra_ignore_dirs=None, follow_links=Tr
         dirs[:] = [d for d in dirs if d not in ignore_dirs]
 
         candidates.append(os.path.basename(root))
-        files = [fn for fn in files if os.path.splitext(fn)[1] == ".py"]
+        if notebooks_are_enabled():
+            files = [fn for fn in files if file_ext_is_allowed(fn, [".py", ".ipynb"])]
+        else:
+            files = [fn for fn in files if file_ext_is_allowed(fn, [".py"])]
+
+        candidates = list(
+            map(
+                lambda fn: os.path.splitext(fn)[0],
+                filter(lambda fn: file_ext_is_allowed(fn, [".py"]), files),
+            )
+        )
 
-        candidates += [os.path.splitext(fn)[0] for fn in files]
         for file_name in files:
             file_name = os.path.join(root, file_name)
-            with open(file_name, "r", encoding=encoding) as f:
-                contents = f.read()
+            contents = ""
+            if file_ext_is_allowed(file_name, [".py"]):
+                with open(file_name, "r", encoding=encoding) as f:
+                    contents = f.read()
+            elif file_ext_is_allowed(file_name, [".ipynb"]) and notebooks_are_enabled():
+                contents = ipynb_2_py(file_name, encoding=encoding)
             try:
                 tree = ast.parse(contents)
                 for node in ast.walk(tree):
@@ -145,11 +178,39 @@ def get_all_imports(path, encoding=None, extra_ignore_dirs=None, follow_links=Tr
     return list(packages - data)
 
-
+
+def get_file_extensions():
+    return [".py", ".ipynb"] if PythonExporter and not ignore_notebooks else [".py"]
-
+
+def get_file_extensions():
+    return [".py", ".ipynb"] if PythonExporter and not ignore_notebooks else [".py"]
 
+def notebooks_are_enabled():
+    return PythonExporter and not ignore_notebooks
+
+
+def file_ext_is_allowed(file_name, acceptable):
+    return os.path.splitext(file_name)[1] in acceptable
+
+
+def ipynb_2_py(file_name, encoding="utf-8"):
+    """
+
+    Args:
+        file_name (str): notebook file path to parse as python script
+        encoding  (str): encoding of file
+
+    Returns:
+        str: parsed string
+
+    """
+
+    exporter = PythonExporter()
+    (body, _) = exporter.from_filename(file_name)
+
+    return body.encode(encoding)
+
+
 def generate_requirements_file(path, imports, symbol):
     with _open(path, "w") as out_file:
         logging.debug(
             "Writing {num} requirements: {imports} to {file}".format(
-                num=len(imports), file=path, imports=", ".join([x["name"] for x in imports])
+                num=len(imports),
+                file=path,
+                imports=", ".join([x["name"] for x in imports]),
             )
         )
         fmt = "{name}" + symbol + "{version}"
@@ -199,7 +260,7 @@ def get_imports_info(imports, pypi_server="https://pypi.python.org/pypi/", proxy
     return result
 
 
-def get_locally_installed_packages(encoding=None):
+def get_locally_installed_packages(encoding="utf-8"):
     packages = []
     ignore = ["tests", "_tests", "egg", "EGG", "info"]
     for path in sys.path:
@@ -240,7 +301,7 @@ def get_locally_installed_packages(encoding=None):
     return packages
 
 
-def get_import_local(imports, encoding=None):
+def get_import_local(imports, encoding="utf-8"):
     local = get_locally_installed_packages()
     result = []
     for item in imports:
@@ -428,25 +489,23 @@ def dynamic_versioning(scheme, imports):
 
 
 def init(args):
+    global ignore_notebooks
     encoding = args.get("--encoding")
     extra_ignore_dirs = args.get("--ignore")
     follow_links = not args.get("--no-follow-links")
+    ignore_notebooks = args.get("--ignore-notebooks")
     input_path = args["<path>"]
+
+    if encoding is None:
+        encoding = "utf-8"
     if input_path is None:
         input_path = os.path.abspath(os.curdir)
 
     if extra_ignore_dirs:
         extra_ignore_dirs = extra_ignore_dirs.split(",")
 
-    path = (
-        args["--savepath"] if args["--savepath"] else os.path.join(input_path, "requirements.txt")
-    )
-    if (
-        not args["--print"]
-        and not args["--savepath"]
-        and not args["--force"]
-        and os.path.exists(path)
-    ):
+    path = args["--savepath"] if args["--savepath"] else os.path.join(input_path, "requirements.txt")
+    if not args["--print"] and not args["--savepath"] and not args["--force"] and os.path.exists(path):
         logging.warning("requirements.txt already exists, " "use --force to overwrite it")
         return
 
@@ -477,17 +536,14 @@ def init(args):
         # the list of exported modules, installed locally
         # and the package name is not in the list of local module names
         # it add to difference
-        difference = [
-            x
-            for x in candidates
-            if
-            # aggregate all export lists into one
-            # flatten the list
-            # check if candidate is in exports
-            x.lower() not in [y for x in local for y in x["exports"]] and
-            # check if candidate is package names
-            x.lower() not in [x["name"] for x in local]
-        ]
+        difference = [x for x in candidates if
+                      # aggregate all export lists into one
+                      # flatten the list
+                      # check if candidate is in exports
+                      x.lower() not in [y for x in local for y in x['exports']]
+                      and
+                      # check if candidate is package names
+                      x.lower() not in [x['name'] for x in local]]
 
         imports = local + get_imports_info(difference, proxy=proxy, pypi_server=pypi_server)
     # sort imports based on lowercase name of package, similar to `pip freeze`.

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,5 @@
 wheel==0.38.1
 Yarg==0.1.9
 docopt==0.6.2
+nbconvert==7.9.2
+
diff --git a/setup.py b/setup.py
@@ -18,6 +18,10 @@
     'docopt', 'yarg'
 ]
 
+jupyter_requirements = [
+    'nbconvert', 'ipython'
+]
+
 setup(
     name='pipreqs',
     version=__version__,
@@ -34,6 +38,7 @@
     include_package_data=True,
     package_data={'': ['stdlib', 'mapping']},
     install_requires=requirements,
+    extras_require={"jupyter": jupyter_requirements},
     license='Apache License',
     zip_safe=False,
     keywords='pip requirements imports',

diff --git a/tests/_data_notebook/magic_commands.ipynb b/tests/_data_notebook/magic_commands.ipynb
@@ -0,0 +1,65 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Magic test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%automagic true"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls -la\n",
+    "logstate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls -la"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%automagic false"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ls -la"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tests/_data_notebook/markdown_test.ipynb b/tests/_data_notebook/markdown_test.ipynb
@@ -0,0 +1,37 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Markdown test\n",
+    "import sklearn\n",
+    "\n",
+    "```python\n",
+    "import FastAPI\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tests/_data_notebook/models.py b/tests/_data_notebook/models.py