Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

provisional function on imports #89

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
8b8446a
-create the function that searches for imports from the notebook ast …
Felice644 May 9, 2022
5930e7b
-improved the find functions of the python core libraries and reduced…
Felice644 May 11, 2022
527c709
- Fixed the function for parsing of .txt, .toml, .yaml file
Felice644 May 19, 2022
a501026
- just a little fix on the name
Felice644 May 19, 2022
c928c12
- First attempt to correct the error on the tests
Felice644 May 20, 2022
88b1a91
- Fixed error on the tests
Felice644 May 21, 2022
c638c58
- Fixed previous comments in notebook class
Felice644 May 21, 2022
d6beae7
Minor improvements in `notebook.py`
louieQ May 22, 2022
9cbed73
- Added test and test class for notebook functions.
Felice644 May 24, 2022
14f9c86
Merge remote-tracking branch 'origin/modules/packeges_imported' into …
Felice644 May 24, 2022
39ff970
- Update project to the last versions
Felice644 May 24, 2022
4a94a5f
- Fixed failing repository tests.
Felice644 May 25, 2022
6163d3b
Integrate the latest refactoring (`core_models.py`)
louieQ May 27, 2022
69cfee8
Improve the parsing strategy for `setup.py` files
louieQ May 27, 2022
c7a9359
Repo requirements parsing fixtures
louieQ May 27, 2022
3c68e02
Refactor requirement file parsers
louieQ May 27, 2022
e3f90a8
Perser for `requirements.txt` ready and tested
louieQ May 27, 2022
4823f1f
Parser for `pyproject.toml` ready and tested
louieQ May 27, 2022
c623078
Parser for `setup.py` ready and tested
louieQ May 27, 2022
28093f3
Parser for `Pipfile` ready and tested
louieQ May 27, 2022
2138192
Handle exceptions while parsing requirement files
louieQ May 30, 2022
de8a6a1
Improve the management of magic functions
louieQ May 30, 2022
03f7185
Check the presence of undeclared dependencies
louieQ May 30, 2022
f24380f
Merge pull request #93 from collab-uniba/merge_PR_89
Felice644 May 31, 2022
4808cd3
- Added tests for dependencies_unmanaged and test_undeclared_dependen…
Felice644 Jun 1, 2022
3c569d2
- Partial solution for the test case.
Felice644 Jun 6, 2022
2f61f4a
Fixed `test_undeclared_dependencies`
louieQ Jun 10, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
533 changes: 475 additions & 58 deletions poetry.lock

Large diffs are not rendered by default.

157 changes: 154 additions & 3 deletions pynblint/core_models.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
import ast
import copy
import os
import re
import tempfile
import zipfile
from abc import ABC
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Set

import git
import nbconvert
import nbformat
import rich
import toml
from nbformat.notebooknode import NotebookNode
from rich.abc import RichRenderable
from rich.columns import Columns
from rich.console import Console, ConsoleOptions, RenderResult
from rich.padding import Padding
from rich.panel import Panel
from rich.syntax import Syntax
from yaml import safe_load

from .config import CellRenderingMode, settings
from .exceptions import InvalidRequirementsFileError
from .rich_extensions import NotebookMarkdown


Expand All @@ -36,6 +40,7 @@ def __init__(self, path: Path):

# Extracted content
self.notebooks: List[Notebook] = [] # List of Notebook objects
self.declared_requirements: Set = self._parse_requirements()

def retrieve_notebooks(self):

Expand Down Expand Up @@ -80,6 +85,114 @@ def large_file_paths(self) -> List[Path]:
large_files.append(file_path)
return large_files

def _parse_requirements(self) -> Set:

supported_requirement_formats = [
"requirements.txt",
"environment.yaml",
"environment.yml",
"pyproject.toml",
"setup.py",
"Pipfile",
]
paths: List[Path] = []

for root, _, files in os.walk(self.path):
for f in files:
if f in supported_requirement_formats:
paths.append(Path(root) / f)

declared_requirements = set()
for path in paths:
if path.name == "requirements.txt":
declared_requirements.update(self._get_requirements_from_txt(path))
elif path.name == "environment.yaml" or path.name == "environment.yml":
declared_requirements.update(self._get_requirements_from_yaml(path))
elif path.name == "pyproject.toml":
declared_requirements.update(self._get_requirements_from_toml(path))
elif path.name == "setup.py":
declared_requirements.update(self._get_requirements_from_setup(path))
elif path.name == "Pipfile":
declared_requirements.update(self._get_requirements_from_pipfile(path))

return declared_requirements

@staticmethod
def _get_requirements_from_txt(path: Path) -> set:
with open(path, "r") as fi:
lines = [
line
for line in fi.readlines()
if line.strip() != "" and not line.startswith("#")
]
return {re.split(r"[><=]=", dependency)[0].rstrip() for dependency in lines}

@staticmethod
def _get_requirements_from_toml(path: Path) -> set:
try:
parsed_toml = toml.load(path)
except Exception:
raise InvalidRequirementsFileError(
"Project requirements could not be parsed in `pyproject.toml`: "
"invalid toml syntax."
)
return set(parsed_toml["tool"]["poetry"]["dependencies"].keys())

@staticmethod
def _get_requirements_from_yaml(path: Path) -> set:
with open(path, "r") as fi:
try:
parsed_yaml = safe_load(fi.read())
except Exception:
raise InvalidRequirementsFileError(
"Project requirements could not be parsed from `environment.yml`: "
"invalid yaml syntax."
)

raw_deps = []
for item in parsed_yaml["dependencies"]:
if type(item) is str:
raw_deps.append(item)
else:
pip_dependencies = item.get("pip")
if pip_dependencies:
raw_deps.extend(pip_dependencies)
return {re.split(r"[><=]?=", req)[0] for req in raw_deps}

@staticmethod
def _get_requirements_from_pipfile(path: Path) -> set:
try:
parsed_pipfile = toml.load(path)
except Exception:
raise InvalidRequirementsFileError(
"Project requirements could not be parsed from `Pipfile`: "
"invalid toml syntax."
)
return set(parsed_pipfile["packages"].keys())

@staticmethod
def _get_requirements_from_setup(path: Path) -> set:
with open(path, "r") as fi:
try:
parsed_setup_file = ast.parse(fi.read())
except Exception:
raise InvalidRequirementsFileError(
"Project requirements could not be parsed from `setup.py`: "
"invalid Python syntax."
)
requirements: Set = set()
for node in ast.walk(parsed_setup_file):
if isinstance(node, ast.Call) and node.func.id == "setup": # type: ignore
for keyword in node.keywords:
if keyword.arg == "install_requires":
raw_requirements_list = ast.literal_eval(keyword.value)
processed_requirements_list = [
re.split(r"[><=]=", req)[0].rstrip()
for req in raw_requirements_list
]
requirements.update(processed_requirements_list)
return requirements


class LocalRepository(Repository):
"""
Expand Down Expand Up @@ -127,7 +240,6 @@ class GitHubRepository(Repository):
"""

def __init__(self, github_url: str):

self.url = github_url

# Clone the repo in a temp directory
Expand Down Expand Up @@ -284,8 +396,13 @@ def __init__(self, path: Path, repository: Optional[Repository] = None):
self.non_executed = all([cell.non_executed for cell in self.code_cells])

# Convert the notebook to a Python script
nb_dict_no_magic = copy.deepcopy(self.nb_dict)
for cell in nb_dict_no_magic.cells:
cell.source = "\n".join(
[line for line in cell.source.splitlines() if not line.startswith("%")]
)
python_exporter = nbconvert.PythonExporter()
self.script, _ = python_exporter.from_notebook_node(self.nb_dict)
self.script, _ = python_exporter.from_notebook_node(nb_dict_no_magic)

# Extract the Python abstract syntax tree
# (or set `has_invalid_python_syntax` to True)
Expand All @@ -295,6 +412,10 @@ def __init__(self, path: Path, repository: Optional[Repository] = None):
except SyntaxError:
self.has_invalid_python_syntax = True

# Get the set of imported Python packages
if not self.has_invalid_python_syntax:
self.imported_packages: Set = self._get_imported_packages()

@property
def code_cells(self) -> List[Cell]:
code_cells = [cell for cell in self.cells if cell.cell_type == CellType.CODE]
Expand All @@ -313,6 +434,36 @@ def initial_cells(self) -> List[Cell]:
def final_cells(self) -> List[Cell]:
return self.cells[-settings.final_cells :] # noqa: E203

def _get_imported_packages(self) -> Set:
"""Builds the set of packages and modules imported in the notebook.

Sice it relies on the ``ast`` module, this function works only for notebooks
with a valid Python syntax. Therefore, a ``ValueError`` exception is raised if
this function is invoked on a notebook containing syntactic Python errors.

Returns:
Set: the set of packages and modules imported in the notebook.
"""

if self.has_invalid_python_syntax:
raise ValueError(
"Imported packages cannot be parsed in notebooks with invalid "
"Python syntax."
)

imported_packages: Set = set()
for node in ast.walk(self.ast):
if isinstance(node, ast.Import):
for name in node.names:
imported_packages.add(name.name.split(".")[0])
elif isinstance(node, ast.ImportFrom):
if node.level > 0:
# Relative imports always refer to the current package.
continue
if node.module:
imported_packages.add(node.module.split(".")[0])
return imported_packages

def __len__(self) -> int:
return len(self.cells)

Expand Down
6 changes: 6 additions & 0 deletions pynblint/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,9 @@ class ExportFormatNotSupportedError(ValueError):
def __init__(self, message) -> None:
self.message = message
super().__init__(self.message)


class InvalidRequirementsFileError(SyntaxError):
def __init__(self, message) -> None:
self.message = message
super().__init__(self.message)
68 changes: 67 additions & 1 deletion pynblint/nb_linting.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
"""Linting functions for notebooks."""
import ast
import re
import sys
from typing import List, Pattern

from isort import stdlibs

from . import lint_register as register
from .config import settings
from .core_models import Cell, CellType, Notebook
Expand Down Expand Up @@ -230,6 +233,59 @@ def non_executed_notebook(notebook: Notebook) -> bool:
return notebook.non_executed


def undeclared_dependencies(notebook: Notebook) -> bool:
"""Check if the dependencies of the notebook are properly declared.

This check is performed only when notebooks belong to a repository.

Args:
notebook (Notebook): the notebook to be analyzed

Returns:
bool: ``True`` if the notebook contains import statements referred to
packages that are not part of the core Python libraries and are not declared
in a dependency management file: ``requirements.txt``, ``setup.py``,
``environment.yml``, ``pyproject.toml``, or ``Pipfile``; ``False`` otherwise.
"""

# Check that the notebook is part of a repository (if not, exit)
if not notebook.repository or notebook.has_invalid_python_syntax:
return False
else:

# Set of python core modules and packages
minor_python_version = sys.version_info.minor

if minor_python_version == 7:
stdlib = stdlibs.py37.stdlib
elif minor_python_version == 8:
stdlib = stdlibs.py38.stdlib
elif minor_python_version == 9:
stdlib = stdlibs.py39.stdlib
elif minor_python_version == 10:
stdlib = stdlibs.py310.stdlib
else:
raise Exception(
"Python version not supported: Pynblint currently supports "
"Python versions from 3.7 to 3.10."
)

core_dependecies = set(stdlib)

# Modules and packages imported in the notebook, excluding core ones
external_dependencies = notebook.imported_packages - core_dependecies

# Modules and packages that are not declared in dependency management files
undeclared_dependencies = (
external_dependencies - notebook.repository.declared_requirements
)

if len(undeclared_dependencies) > 0:
return True
else:
return False


# ========== #
# CELL LEVEL #
# ========== #
Expand Down Expand Up @@ -385,6 +441,16 @@ def long_multiline_python_comment(notebook: Notebook) -> List[Cell]:
"that all cells are executed.",
linting_function=non_executed_notebook,
),
LintDefinition(
slug="undeclared-dependencies",
description="The notebook has external dependencies that are not declared "
"in a dependency management file (e.g., `requirements.txt`, `setup.py`, "
"`environment.yml`, `pyproject.toml`, or `Pipfile`)",
recommendation="Use a dependency management tool (e.g., `pip`, `conda`, or "
"`Poetry`) to declare your dependencies or to refresh the set "
"of declared dependencies.",
linting_function=undeclared_dependencies,
),
]

cell_level_lints: List[LintDefinition] = [
Expand All @@ -403,7 +469,7 @@ def long_multiline_python_comment(notebook: Notebook) -> List[Cell]:
show_details=False,
),
LintDefinition(
slug="long_multiline_python_comment",
slug="long-multiline-python-comment",
description="One or more code cells in this notebook contain Python comments "
f"of {settings.max_multiline_python_comment} or more consecutive lines.",
recommendation="For improved notebook readability, prefer using Markdown "
Expand Down
13 changes: 13 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ rich = "^11.1.0"
typer = "^0.4.0"
pydantic = {extras = ["dotenv"], version = "^1.9.0"}
ipython = "<8"
isort = "^5.10.1"
types-toml = "^0.10.7"
yml = "^0.0.1"
types-PyYAML = "^6.0.7"
toml = "^0.10.2"
poetry-types = "^0.2.2"

[tool.poetry.dev-dependencies]
jupyterlab = "^3.2.8"
Expand Down Expand Up @@ -57,3 +63,10 @@ max-line-length = 88

[tool.bandit]
exclude_dirs = ["tests"]

[[tool.mypy.overrides]]
module = [
"toml",
"yaml"
]
ignore_missing_imports = true
Empty file added tests/__init__.py
Empty file.
Loading