diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 1439ba1..897caaa 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -1,23 +1,29 @@ +name: pre-commit +on: [pull_request] -name: python-lint -on: [push, pull_request] jobs: - isort: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - uses: jamescurtin/isort-action@master - with: - configuration: "--check --verbose --diff --color --py=38 --profile=black" - black: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: psf/black@stable - with: - options: "--check --verbose --diff --color --target-version=py38" - version: "22.8.0" - src: "." + + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: '3.9' + cache: 'pip' + + - id: cache-pre-commit-envs + uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: ${{ runner.os }}-pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} + + - id: install-requirements + run: pip install -e .[dev] + + - name: pre-commit + id: pre-commit + run: | + set -o pipefail + pre-commit run --all-files diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 0c23870..8510d41 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -13,10 +13,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: '3.x' diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 6ae99b5..683faaa 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -16,13 +16,19 @@ jobs: strategy: matrix: os: [ubuntu-latest] # [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.10"] + python-version: [ + "3.8", + "3.9", + "3.10", + "3.11", + "3.12", + ] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -34,7 +40,7 @@ jobs: - name: Test with pytest run: | - python3 -m pytest --cov=pydependance tests/ + python3 -m pytest --cov=pydependence tests/ - uses: codecov/codecov-action@v1 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab8de4d..0673988 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,20 +1,34 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-added-large-files args: ["--maxkb=500"] + - id: check-case-conflict + - id: detect-private-key + + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.2 + hooks: + - id: pyupgrade + args: + - "--py38-plus" - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 5.13.2 hooks: - id: isort name: isort - args: ["--py=38", "--profile=black"] + args: ["--verbose", "--py=39", "--profile=black"] - repo: https://github.com/psf/black - rev: 22.8.0 + rev: 24.4.0 hooks: - id: black - args: ["--target-version=py38"] + args: ["--verbose", "--target-version=py39"] + + # - repo: https://github.com/nmichlo/pydependence + # # rev: v0.2.0 + # hooks: + # - id: pydependence diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000..239a12d --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,15 @@ +- id: pydependence + name: PyDependence + description: Generate dependencies for your project using the python AST. + language: python + always_run: true + verbose: true + entry: python3 -m pydependence.__main__ + files: ^pyproject\.toml$ + language_version: "3.9" + additional_dependencies: + - 'pydantic==2.*' + - 'packaging' + - 'networkx' + - 'stdlib_list' + - 'tomlkit' diff --git a/LICENSE b/LICENSE index 0f9d34e..7529b23 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Nathan +Copyright (c) 2023 Nathan Juraj Michlo Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index a7da3fc..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include requirements.txt -include LICENSE diff --git a/README.md b/README.md index 81d47fa..bef6f18 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

-

🕵️ pydependance 🐍

+

🕵️ pydependence 🐍

Python local package dependency discovery and resolution

@@ -8,25 +8,25 @@

- license + license - - + + - - pypi version + + pypi version - - + + - - + +

- Contributions are welcome! + Contributions are welcome!

@@ -69,15 +69,15 @@ This project was created for multiple reasons ## Install -`pydependance` currently requires `python==3.10`, however, +`pydependence` currently requires `python==3.10`, however, it can still be run in a virtual environment over legacy python code ```bash -pip install pydependance +pip install pydependence ``` ## Usage ```bash -python -m pydependance --help +python -m pydependence --help ``` diff --git a/pydependance/__main__.py b/pydependance/__main__.py deleted file mode 100644 index aa44e35..0000000 --- a/pydependance/__main__.py +++ /dev/null @@ -1,496 +0,0 @@ -# ============================================================================== # -# MIT License # -# # -# Copyright (c) 2022 Nathan # -# # -# Permission is hereby granted, free of charge, to any person obtaining a copy # -# of this software and associated documentation files (the "Software"), to deal # -# in the Software without restriction, including without limitation the rights # -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # -# copies of the Software, and to permit persons to whom the Software is # -# furnished to do so, subject to the following conditions: # -# # -# The above copyright notice and this permission notice shall be included in all # -# copies or substantial portions of the Software. # -# # -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # -# SOFTWARE. # -# ============================================================================== # - -import os.path -from pathlib import Path -from typing import Dict, List, Literal, Optional, Union - -import tomlkit -from pydantic import BaseModel, Extra, Field, root_validator, validator -from tomlkit import TOMLDocument -from tomlkit.items import Array, Table - -from pydependance._core import ModuleNamespace - -# ========================================================================= # -# Configs # -# ========================================================================= # - - -class CfgBase(BaseModel): - class Config: - extra = Extra.forbid - - -class CfgRestrict(CfgBase): - # restrict the loaded namespace - restrict: Optional[List[str]] = None - restrict_mode: str = "children" - restrict_depth: int = -1 - restrict_depth_mode: str = "children" - - -class CfgNamespace(CfgRestrict): - name: str - # discover the namespace - pkg_roots: List[str] = Field(default_factory=list) - pkg_search: List[str] = Field(default_factory=list) - parents: List[str] = Field(default_factory=list) - - @root_validator() - def _validate_parents(cls, values): - if not (values["pkg_roots"] or values["pkg_search"], values["parents"]): - raise ValueError( - "must specify at least one of: [pkg_roots, pkg_search, parents]" - ) - return values - - -class CfgOutput_CmdRequirements(CfgBase): - mode: Literal["requirements"] - namespace: str - required: Optional[List[str]] = None - optional: Optional[List[str]] = None - path: Optional[Union[str, List[str]]] = None - allow_files_as_modules: bool = True - - -class CfgOutput_CmdImportsAll(CfgBase): - mode: Literal["imports_all"] - namespace: str - - -# union of all commands -CfgOutput = Union[CfgOutput_CmdRequirements, CfgOutput_CmdImportsAll] - - -class Config(CfgBase): - root: str - namespaces: List[CfgNamespace] = Field(default_factory=list) - outputs: List[CfgOutput] = Field(default_factory=list) - import_map: Dict[str, str] = Field(default_factory=dict) - versions: Dict[str, Optional[str]] = Field(default_factory=dict) - - @validator("root") - def _validate_root(cls, root): - if not os.path.isabs(root): - raise ValueError("root must be an absolute path") - return root - - @root_validator() - def _validate_paths(cls, values): - # update all relative paths to use the root - root = values["root"] - for namespace in values["namespaces"]: - namespace.pkg_roots = [os.path.join(root, p) for p in namespace.pkg_roots] - namespace.pkg_search = [os.path.join(root, p) for p in namespace.pkg_search] - return values - - @root_validator() - def _validate_deps(cls, values): - def _check(name: str): - if name.lower() != name: - raise ValueError( - f"package name should be lowercase. PYPI is case insensitive. {repr(name)} -> {repr(name.lower())}" - ) - if "-" in name: - raise ValueError( - f'package name should use underscores instead of dashes, PYPI is dash-unserscore insensitive. {repr(name)} -> {repr(name.replace("-", "_"))}' - ) - if not str.isidentifier(name): - raise ValueError( - f"package name is not a valid python identifier: {repr(name)}" - ) - - # check - # for name, version in values.get('import_map', {}).items(): - # _check(name) - for name, version in values.get("versions", {}).items(): - _check(name) - # done! - return values - - @root_validator() - def _validate_namespaces(cls, values): - namespaces = set() - # 1. make sure namespace names are unique, and that parent namespaces exist - for namespace in values.get("namespaces", []): - if namespace.name in namespaces: - raise ValueError( - f"namespace has duplicate name: {repr(namespace.name)}" - ) - for parent in namespace.parents: - if parent not in namespaces: - raise KeyError( - f"namespace: {repr(namespace.name)} has parent that does not yet exist: {repr(parent)}" - ) - namespaces.add(namespace.name) - # 2. make sure that outputs have valid namespaces references - for output in values.get("outputs", []): - if output.namespace not in namespaces: - raise KeyError( - f"output source namespace does not exist: {repr(output.namespace)}" - ) - # if (output.resolve_against is not None) and (output.resolve_against not in namespaces): - # raise KeyError(f'output resolve_against namespace does not exist: {repr(output.resolve_against)}') - # done! - return values - - -# ========================================================================= # -# Runner # -# ========================================================================= # - - -class Runner: - def __init__(self, config: Config): - self._config = config - self._namespaces = self._load_namespaces(config) - - @classmethod - def from_path(cls, path: Union[Path, str]): - path = os.path.abspath(path) - # handle loading the different file types - if path.endswith(".toml"): - import toml - - with open(path, "r") as fp: - config = toml.load(fp) - config = config.get("tool", {}).get("pydependance", {}) - elif path.endswith(".yaml") or path.endswith(".yml"): - import yaml - - with open(path, "r") as fp: - config = yaml.safe_load(fp) - else: - raise RuntimeError(f"unsupported path type: {path}") - # - root is by default the parent folder of the config - # this affects all relative paths in the config itself - config["root"] = os.path.abspath( - os.path.join(path, "..", config.get("root", ".")) - ) - # done! - return cls(Config(**config)) - - # --- loading from config --- # - - @classmethod - def _load_namespaces(cls, config: Config) -> Dict[str, ModuleNamespace]: - # load the namespaces in order - namespaces = {} - for cfg_namespace in config.namespaces: - namespaces[cfg_namespace.name] = cls._namespace_load( - namespaces, config, cfg_namespace - ) - return namespaces - - @classmethod - def _namespace_load( - cls, - namespaces: Dict[str, ModuleNamespace], - config: Config, - cfg_namespace: CfgNamespace, - ): - # at this point, everything is already validated! all references should exist! - namespace = ModuleNamespace() - # 1. load individual packages -- add root to path if need - if cfg_namespace.pkg_roots: - namespace.add_modules_from_packages( - os.path.join(config.root, path) for path in cfg_namespace.pkg_roots - ) - # 2. load packages by searching, like for PYTHONPATH -- add root to path if need - if cfg_namespace.pkg_search: - namespace.add_modules_from_python_paths( - os.path.join(config.root, path) for path in cfg_namespace.pkg_search - ) - # 3. add existing namespaces to this namespace - if cfg_namespace.parents: - for parent_name in cfg_namespace.parents: - modules = list(namespaces[parent_name].modules()) - namespace.add_modules(modules) - # 4. restrict the namespace - namespace = cls._namespace_restrict(cfg_namespace, namespace) - return namespace - - @classmethod - def _namespace_restrict( - cls, cfg_namespace: CfgNamespace, namespace: ModuleNamespace - ) -> ModuleNamespace: - if cfg_namespace.restrict: - namespace = namespace.restrict( - imports=cfg_namespace.restrict, mode=cfg_namespace.restrict_mode - ) - if cfg_namespace.restrict_depth >= 0: - namespace = namespace.restrict_depth( - depth=cfg_namespace.restrict_depth, - mode=cfg_namespace.restrict_depth_mode, - ) - return namespace - - def run(self): - for output in self._config.outputs: - fn = _COMMANDS[output.mode] - fn(self._config, output, self._namespaces[output.namespace]) - - -# ========================================================================= # -# Requirement Writers # -# ========================================================================= # - - -class ReqWriter: - def __init__(self, path: Optional[str]): - self.path = path - self.load() - - def write_deps(self, deps, modules, diff): - raise NotImplementedError - - def write_optional(self, module, deps, diff, already_included): - raise NotImplementedError - - def load(self): - raise NotImplementedError - - def save(self): - raise NotImplementedError - - def print(self): - raise NotImplementedError - - -class ReqWriterToml(ReqWriter): - def write_deps(self, deps, modules, diff): - self._fill_list(self.rdeps, deps) - if deps: - self.rdeps.comment( - f"[{', '.join(sorted(modules))}]" - + (f" -> [{', '.join(sorted(diff))}]" if diff else "") - ) - - def write_optional(self, module, deps, diff, already_included): - self.odeps.add(tomlkit.ws("\n")) - self.odeps.add(tomlkit.comment(f"{module} -> [{', '.join(sorted(diff))}]")) - if already_included: - self.odeps.add( - tomlkit.comment( - f"- already included: {', '.join(sorted(already_included))}" - ) - ) - self.odeps.append(module, self._fill_list(tomlkit.array(), deps)) - - def _fill_list(self, array: Array, items): - items = list(items) - array.clear() - if items: - for r in items: - array.add_line(r) - array.add_line(indent="") - return array - - def load(self): - # load pyproject.toml - if self.path and os.path.exists(self.path): - with open(self.path, "r") as fp: - self.pyproject: TOMLDocument = tomlkit.load(fp) - else: - self.pyproject = TOMLDocument() - # load dependencies - self.project: Table = self.pyproject.setdefault("project", {}) - self.project["dependencies"] = [] - self.project["optional-dependencies"] = {} - self.rdeps: Array = self.project["dependencies"] - self.odeps: Table = self.project["optional-dependencies"] - - def save(self): - if self.path: - with open(self.path, "w") as fp: - tomlkit.dump(self.pyproject, fp) - - def print(self): - print(tomlkit.dumps(self.pyproject)) - - -class ReqWriterTxt(ReqWriter): - def write_deps(self, deps, modules, diff): - self.lines.append( - f"# DEPENDENCIES: [{', '.join(sorted(modules))}]" - + (f" -> [{', '.join(sorted(diff))}]" if diff else "") - ) - self.lines.extend(deps) - - def write_optional(self, module, deps, diff, already_included): - self.lines.append("") - self.lines.append(f"# OPTIONAL: {module} -> [{', '.join(sorted(diff))}]") - self.lines.append( - f"# - already included: {', '.join(sorted(already_included))}" - ) - self.lines.extend(f"# | {d}" for d in deps) - - def load(self): - self.lines = [] - - def save(self): - with open(self.path, "w") as fp: - fp.writelines(f"{l}\n" for l in self.lines) - - def print(self): - for line in self.lines: - print(line) - - -# ========================================================================= # -# COMMANDS # -# ========================================================================= # - - -def _command_requirements_collect( - output: CfgOutput_CmdRequirements, namespace: ModuleNamespace -): - # 1. all imports are required by default - required = output.required - if required is None: - required = set( - module.import_path - for module in namespace.modules_roots() - if (module.is_package or output.allow_files_as_modules) - ) - - # 2. all remaining imports are optional by default - optional = output.optional - if optional is None: - optional = set( - module.import_path - for module in namespace.modules_roots() - if (module.is_package or output.allow_files_as_modules) and (module.import_path not in required) - ) - - # 3. collect all the children imports in the modules - def _get_imports(module: str): - ns = namespace.restrict([module], mode="children") - orig = set(ns.imports_unique(roots=True, builtin=False)) - resolved = set( - ns.imports_resolved( - against=namespace, roots=True, builtin=False, mode="children" - ) - ) - return orig, resolved - - required = {module: _get_imports(module) for module in sorted(required)} - optional = {module: _get_imports(module) for module in sorted(optional)} - return required, optional - - -def _command_requirements_write( - writer: ReqWriter, config: Config, required: dict, optional: dict -): - def _replace(import_: str): - # import_ = import_.replace('-', '_').lower() - import_ = config.import_map.get(import_, import_) - version = config.versions.get(import_, None) - return f"{import_}{version}" if version else f"{import_}" - - # 1. get the required deps - required_modules = set(required.keys()) - required_orig = set(k for orig, resolved in required.values() for k in orig) - required_resolved = set(k for orig, resolved in required.values() for k in resolved) - required_diff = required_orig - required_resolved - required_deps = sorted(set(map(_replace, required_resolved))) - # - generate the required deps output - writer.write_deps(required_deps, required_modules, required_diff) - - # 2. add the optional items - for module, (mo, mr) in optional.items(): - # remove required from orig and resolved - module_orig = mo - required_resolved - module_resolved = mr - required_resolved - # output - module_diff = module_orig - module_resolved - module_deps = sorted(set(map(_replace, module_resolved))) - already_included = mr & required_resolved - # update - writer.write_optional(module, module_deps, module_diff, already_included) - - -def _command_requirements( - config: Config, output: CfgOutput_CmdRequirements, namespace: ModuleNamespace -): - # normalize - paths = output.path - if paths is None: - return - elif isinstance(paths, str): - paths = [paths] - - # collect requirements - required, optional = _command_requirements_collect(output, namespace) - - for path in paths: - # add root to path if needed - path = os.path.join(config.root, path) - if path.endswith(".toml"): - writer = ReqWriterToml(path) - elif path.endswith(".txt") or path.endswith(".in"): - writer = ReqWriterTxt(path) - else: - raise RuntimeError( - f"unsupported requirements file type, for file: {Path(path).name}" - ) - - # print seperator - print(f'\n# {"="*77} #\n# {path}\n# {"="*77} #\n') - - # load, write, save - writer.load() - _command_requirements_write(writer, config, required, optional) - writer.print() - writer.save() - - -_COMMANDS = {"requirements": _command_requirements} - - -# ========================================================================= # -# ENTRY # -# ========================================================================= # - - -if __name__ == "__main__": - - def cli(): - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("-c", "--config", type=str, required=True) - args = parser.parse_args() - - runner = Runner.from_path(args.config) - - try: - runner.run() - except Exception as e: - print(f"error: {e} for config: {repr(args.config)}") - raise e - - cli() diff --git a/pydependance/_core.py b/pydependance/_core.py deleted file mode 100644 index 25cba14..0000000 --- a/pydependance/_core.py +++ /dev/null @@ -1,794 +0,0 @@ -# ============================================================================== # -# MIT License # -# # -# Copyright (c) 2022 Nathan # -# # -# Permission is hereby granted, free of charge, to any person obtaining a copy # -# of this software and associated documentation files (the "Software"), to deal # -# in the Software without restriction, including without limitation the rights # -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # -# copies of the Software, and to permit persons to whom the Software is # -# furnished to do so, subject to the following conditions: # -# # -# The above copyright notice and this permission notice shall be included in all # -# copies or substantial portions of the Software. # -# # -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # -# SOFTWARE. # -# ============================================================================== # - -import sys -from collections import defaultdict - -# check the python version -if sys.version_info < (3, 10): - print("please use python >= 3.10") - exit(1) - -import ast -import sys -import warnings -from pathlib import Path -from typing import Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union - -# ========================================================================= # -# Load Builtin Packages # -# ========================================================================= # - - -BUILTIN_PKGS = { - "__main__", - *sys.builtin_module_names, - *sys.stdlib_module_names, # python 3.10 -} - - -def import_get_root(import_: "ImportType") -> str: - root = import_to_keys(import_, check=False)[0] - return root - - -def import_is_builtin(import_: "ImportType") -> bool: - root = import_get_root(import_) - return root in BUILTIN_PKGS - - -# ========================================================================= # -# Ast Import Finder # -# ========================================================================= # - - -def ast_get_module_imports(path: Union[str, Path]) -> List[Tuple[List[str], bool]]: - imports = [] - ast_node_stack = [] - is_eval_stack = [True] - INDIRECT = {"FunctionDef"} - - class AstImportCollector(ast.NodeVisitor): - - # TODO: we should implement a basic interpreter to detect if imports are - # immediate or indirect, for example imports at the root of a module or - # inside a class would evaluate immediately, but imports inside a function - # will probably be lazily imported, and can be marked as such. - - def visit(self, node): - # basic interpreter - is_eval = is_eval_stack[-1] and (node.__class__.__name__ not in INDIRECT) - ast_node_stack.append(node) - is_eval_stack.append(is_eval) - # continue recursion - super().visit(node) - # undo - ast_node_stack.pop() - is_eval_stack.pop() - - def visit_Import(self, node): - # eg. import pkg.submodule - imports.extend( - (n.name.split("."), False, is_eval_stack[-1], tuple(ast_node_stack)) - for n in node.names - ) - return node - - def visit_ImportFrom(self, node): - assert node.level in (0, 1) # node.names: from * import name, ... - # eg: from . import ? - # eg: from .submodule import ? - # eg: from pkg.submodule import ? - import_keys = node.module.split(".") if node.module else [] - is_relative = node.level != 0 - imports.append( - (import_keys, is_relative, is_eval_stack[-1], tuple(ast_node_stack)) - ) - return node - - # collect import from file - with open(path) as f: - AstImportCollector().generic_visit(node=ast.parse(f.read())) - return imports - - -# ========================================================================= # -# Module Helper # -# ========================================================================= # - - -INIT_PY = "__init__.py" - - -ImportKey = Tuple[str, ...] -ImportType = Union[str, ImportKey, "Import", "Module"] - - -def _import_to_keys(import_: ImportType) -> ImportKey: - # split the import if needed - if isinstance(import_, str): - import_ = import_.split(".") - elif isinstance(import_, Import): - import_ = import_.target_keys - elif isinstance(import_, Module): - import_ = import_.import_keys - return tuple(import_) - - -def import_to_keys(import_: ImportType, check: bool = True) -> ImportKey: - import_keys, orig = _import_to_keys(import_), import_ - # check, all parts must be identifiers, and there must be at least one part - if check: - import_check_keys(import_keys, orig=orig) - return import_keys - - -def import_check_keys(import_keys: ImportKey, orig=None) -> ImportKey: - if orig is None: - orig = import_keys - if not import_keys: - raise ValueError( - f"import path must have at least one part for: {repr(import_keys)}" - ) - if not isinstance(import_keys, tuple): - raise TypeError( - f"import keys must be a tuple, got: {type(import_keys)} for: {repr(orig)}" - ) - for part in import_keys: - if not isinstance(part, str): - raise TypeError( - f"import part: {repr(part)} is not a string, got type: {type(part)}, obtained from: {repr(import_keys)}" - ) - if not part.isidentifier(): - warnings.warn( - f"import part: {repr(part)} is not a valid identifier, obtained from: {repr(import_keys)}" - ) - return import_keys - - -def normalize_imports_pipe( - imports: Iterable[ImportType], - roots: bool = False, - builtin: bool = True, - keys: bool = False, -) -> Union[Iterable[ImportKey], Iterable[str]]: - imports = (import_to_keys(imp) for imp in imports) - if not builtin: - imports = (imp for imp in imports if imp[0] not in BUILTIN_PKGS) - if roots: - imports = (imp[0:1] for imp in imports) - if not keys: - imports = (".".join(imp) for imp in imports) - return imports - - -def is_python_module(path: Path, validate_name: bool = False) -> bool: - is_module = path.is_file() and path.name.endswith(".py") - if validate_name: - is_module = is_module and path.name[:-3].isidentifier() - return is_module - - -def is_python_package(path: Path, validate_name: bool = False) -> bool: - is_package = path.is_dir() and path.joinpath(INIT_PY).is_file() - if validate_name: - is_package = is_package and path.name.isidentifier() - return is_package - - -def is_child_import(parent, child) -> bool: - parent = import_to_keys(parent) - child = import_to_keys(child) - if len(child) < len(parent): - return False - return parent == child[: len(parent)] - - -def find_modules( - root: Path, - max_depth: int = -1, - skip_root_files: bool = False, -): - def _recurse(path: Path, parent_keys: ImportKey, depth: int): - if depth > max_depth >= 0: - return - - # eg. .py - if is_python_module(path): - if skip_root_files and depth == 0: - return - assert path.name != INIT_PY - yield path, (*parent_keys, path.name[:-3]) - - # eg. /__init__.py - elif is_python_package(path): - keys = (*parent_keys, path.name) - yield path, keys - # continue recursively, making sure to skip __init__.py files - for p in path.iterdir(): - if p.name != INIT_PY: - yield from _recurse(p, parent_keys=keys, depth=depth + 1) - - root = Path(root) - # make sure that if we skip root __init__.py files - if root.name == INIT_PY: - warnings.warn( - f"root cannot be an {INIT_PY} file, returning no modules for: {root.resolve()}" - ) - return - # find values! - yield from ( - Module(module_path, import_keys) - for module_path, import_keys in _recurse(root, parent_keys=(), depth=0) - ) - - -def _yield_import_and_keys( - node: Union["Module", "ModuleNamespace"], - roots: bool = False, - builtin: bool = True, -) -> Iterable[Tuple[str, "Import"]]: - for import_ in node.imports(builtin=builtin): - if roots: - key = import_.target_root - else: - key = import_.target_path - yield key, import_ - - -def _yield_imports( - node: Union["Module", "ModuleNamespace"], - roots: bool = False, - builtin: bool = True, -) -> Iterable[str]: - visited = set() - for key, import_ in _yield_import_and_keys(node, roots=roots, builtin=builtin): - # return the result if it has not been seen - if key not in visited: - visited.add(key) - yield key - - -def _group_imports( - node: Union["Module", "ModuleNamespace"], - roots: bool = False, - builtin: bool = True, -) -> Dict[str, List["Import"]]: - groups = defaultdict(list) - for key, import_ in _yield_import_and_keys(node, roots=roots, builtin=builtin): - groups[key].append(import_) - return dict(groups) - - -def _group_imports_from_modules( - node: Union["Module", "ModuleNamespace"], - roots: bool = False, - builtin: bool = True, -) -> Dict[str, Set["Module"]]: - groups = defaultdict(set) - for key, import_ in _yield_import_and_keys(node, roots=roots, builtin=builtin): - groups[key].add(import_.source_module) - return groups - - -# ========================================================================= # -# Data Structures # -# ========================================================================= # - - -class Import: - @classmethod - def from_module_perspective( - cls, - module: "Module", - keys: Union[str, Sequence[str]], - is_relative: bool, - is_immediate_eval: bool, - ast_parents: Tuple[ast.AST], - ): - orig = keys - if isinstance(keys, str): - keys = keys.split(".") - keys = tuple(keys) - if is_relative: - keys = module.import_keys[:-1] + keys - import_check_keys(keys, orig=orig) - return Import( - keys, - source_module=module, - is_immediate_eval=is_immediate_eval, - ast_parents=ast_parents, - ) - - def __init__( - self, - target: Union[str, Sequence[str]], - source_module: "Module", - is_immediate_eval: Optional[bool] = None, - ast_parents: Optional[Tuple[ast.AST]] = None, - ): - self._target_keys = import_to_keys(target) - self._source_module = source_module - self._is_immediate_eval = is_immediate_eval - self._ast_parents = ast_parents - - def __repr__(self): - return f"{self.__class__.__name__}<{self.target_path}>" - - @property - def is_immediate_eval(self) -> Optional[bool]: - return self._is_immediate_eval - - @property - def ast_parents(self) -> Optional[Tuple[ast.AST, ...]]: - return self._ast_parents - - @property - def target_keys(self) -> ImportKey: - return self._target_keys - - @property - def target_path(self) -> str: - return ".".join(self._target_keys) - - @property - def target_root(self) -> str: - return self.target_keys[0] - - @property - def target_depth(self) -> int: - return len(self.target_keys) - - @property - def source_module(self) -> "Module": - return self._source_module - - def __eq__(self, other): - if isinstance(other, (Module, Import, str, tuple)): - return self.target_keys == import_to_keys(other, check=False) - return False - - def __lt__(self, other): - return self.target_keys < other.target_keys - - def __hash__(self): - return hash("" + self.target_path) - - -class Module: - def __init__( - self, - path: Union[str, Path], - import_: Union[str, Sequence[str]], - _load_: bool = True, - ): - # check the path - path = Path(path) - if is_python_module(path): - self._is_package = path.name == INIT_PY - elif is_python_package(path): - self._is_package = True - path = path.joinpath(INIT_PY) - else: - raise ValueError(f"not a valid python module or package: {path}") - # initialize - self._abs_path: Path = path.absolute() - self._import_keys = import_to_keys(import_) - # load imports - if _load_: - self._imports = [ - Import.from_module_perspective( - self, - keys=keys, - is_relative=is_relative, - is_immediate_eval=is_immediate_eval, - ast_parents=ast_parents, - ) - for keys, is_relative, is_immediate_eval, ast_parents in ast_get_module_imports( - self.path - ) - ] - else: - self._imports = [] - - def __repr__(self): - return f"{self.__class__.__name__}<{self.import_path}>" - - def __eq__(self, other): - if isinstance(other, (Module, Import, str, tuple)): - return self.import_keys == import_to_keys(other, check=False) - return False - - def __lt__(self, other): - return self.import_keys < other.import_keys - - def __hash__(self): - return hash("" + self.import_path) - - @property - def is_package(self) -> bool: - return self._is_package - - @property - def is_root(self) -> bool: - return self.import_depth == 1 - - @property - def path(self) -> Path: - return self._abs_path - - @property - def import_keys(self) -> ImportKey: - return self._import_keys - - @property - def import_path(self) -> str: - return ".".join(self._import_keys) - - @property - def import_root(self) -> str: - return self.import_keys[0] - - @property - def import_depth(self) -> int: - return len(self.import_keys) - - def imports(self, builtin: bool = True) -> Iterable[Import]: - if builtin: - yield from self._imports - else: - yield from ( - imp for imp in self._imports if imp.target_root not in BUILTIN_PKGS - ) - - def imports_unique( - self, roots: bool = False, builtin: bool = True - ) -> Iterable[str]: - yield from _yield_imports(self, roots=roots, builtin=builtin) - - def imports_grouped( - self, roots: bool = False, builtin: bool = True - ) -> Dict[str, List[Import]]: - return _group_imports(self, roots=roots, builtin=builtin) - - def imports_ref_modules( - self, roots: bool = False, builtin: bool = True - ) -> Dict[str, Set["Module"]]: - return _group_imports_from_modules(self, roots=roots, builtin=builtin) - - -class ModuleNamespace: - - _modules: Dict[ImportKey, Module] - - def __init__(self): - self._modules = {} - # cache used to help speed up some functions - # this might use a lot of memory, so we make - # sure to limit its size when used - self._cache = None - - def copy(self) -> "ModuleNamespace": - namespace = ModuleNamespace() - namespace._modules = dict(self._modules) - return namespace - - def __repr__(self): - return f'{self.__class__.__name__}<{", ".join(".".join(k) for k in self._modules.keys() if len(k) == 1)}>' - - # ~=~=~=~=~=~=~ # - # Add Imports # - # ~=~=~=~=~=~=~ # - - def _add_import_unchecked(self, import_: Import): - module_keys = import_.source_module.import_keys - module = self._modules.get(module_keys, None) - # create the module if missing - if module is None: - module = Module(import_.source_module.path, module_keys, _load_=False) - self._modules[module_keys] = module - # add the import to the module - module._imports.append(import_) - - # ~=~=~=~=~=~=~ # - # Add Modules # - # ~=~=~=~=~=~=~ # - - def add_modules(self, modules: Sequence[Module]) -> "ModuleNamespace": - for module in modules: - if module.import_keys in self._modules: - raise RuntimeError( - f"module {repr(module.import_path)} has already been added to namespace" - ) - for module in modules: - self._modules[module.import_keys] = module - return self - - def add_modules_from_packages( - self, roots: Iterable[Union[str, Path]] - ) -> "ModuleNamespace": - modules = [m for root in roots for m in find_modules(root)] - self.add_modules(modules) - return self - - def add_modules_from_python_paths( - self, python_paths: Optional[Iterable[Union[str, Path]]] - ) -> "ModuleNamespace": - if python_paths is None: - python_paths = sys.path - paths = [ - path - for python_path in python_paths - for path in Path(python_path).iterdir() - if is_python_package(path) or is_python_module(path) - ] - self.add_modules_from_packages(paths) - return self - - # ~=~=~=~=~=~=~ # - # Filtering # - # ~=~=~=~=~=~=~ # - - def filtered( - self, - *, - keep: Callable[[Module], bool] = None, - remove: Callable[[Module], bool] = None, - ) -> "ModuleNamespace": - result = self.copy() - if keep: - result._modules = {k: m for k, m in result._modules.items() if keep(m)} - if remove: - result._modules = { - k: m for k, m in result._modules.items() if not remove(m) - } - return result - - def restrict(self, imports, mode: str = "exact"): - if isinstance(imports, (str, tuple, Import, Module)): - imports = [imports] - imports = set(import_to_keys(imp) for imp in imports) - # restrict based on the mode - if mode == "exact": - return self.filtered(keep=lambda m: m.import_keys in imports) - elif mode == "children": - return self.filtered( - keep=lambda m: any( - is_child_import(parent=keys, child=m) for keys in imports - ) - ) - elif mode == "root_children": - return self.filtered( - keep=lambda m: any( - is_child_import(parent=keys[0], child=m) for keys in imports - ) - ) - else: - raise KeyError(f"invalid restrict mode: {repr(mode)}") - - def restrict_depth(self, depth: int, mode: str = "exact"): - if depth < 0: - return self - if mode == "exact": - return self.filtered(keep=lambda m: len(m.import_keys) == depth) - elif mode == "children": - return self.filtered(keep=lambda m: len(m.import_keys) >= depth) - else: - raise KeyError(f"invalid restrict mode: {repr(mode)}") - - # ~=~=~=~=~=~=~ # - # Getters # - # ~=~=~=~=~=~=~ # - - def __getitem__(self, import_: ImportType): - import_ = import_to_keys(import_) - return self._modules[import_] - - def __contains__(self, import_: ImportType): - import_ = import_to_keys(import_) - return import_ in self._modules - - def __iter__(self) -> Iterable[Module]: - yield from self._modules.values() - - def modules(self) -> Iterable[Module]: - yield from self._modules.values() - - def modules_roots(self) -> Iterable[Module]: - for k, m in self._modules.items(): - if len(k) == 1: - yield m - - def imports(self, builtin: bool = True) -> Iterable[Import]: - for module in self._modules.values(): - yield from module.imports(builtin=builtin) - - def imports_unique( - self, roots: bool = False, builtin: bool = True - ) -> Iterable[str]: - yield from _yield_imports(self, roots=roots, builtin=builtin) - - def imports_grouped( - self, roots: bool = False, builtin: bool = True - ) -> Dict[str, List[Import]]: - return _group_imports(self, roots=roots, builtin=builtin) - - def imports_ref_modules( - self, roots: bool = False, builtin: bool = True - ) -> Dict[str, Set[Module]]: - return _group_imports_from_modules(self, roots=roots, builtin=builtin) - - def imports_resolved( - self, - against: "ModuleNamespace" = None, - roots: bool = False, - builtin: bool = True, - mode: str = "exact", - ) -> Set[str]: - if against is None: - against = self - # get the unique imports, and flatten imports - # using keys in the specified namespace - return against.resolve_imports( - imports=self.imports_unique(roots=False, builtin=builtin), - roots=roots, - builtin=builtin, - mode=mode, - ) - - # ~=~=~=~=~=~=~ # - # Resolving # - # ~=~=~=~=~=~=~ # - - def resolve_imports( - self, - imports: Iterable[ImportType], - roots: bool = False, - builtin: bool = True, - mode: str = "exact", - ) -> Set[str]: - """ - This function only resolved the specified imports based on the current - namespace, by performing a BFS - - This nice thing is that you can restrict adding entries based on the `mode` - to the "exact" files visited, or you can be safe by adding all "children", - or even the "root_children" of the visited imports - - ALGORITHM: - * perform a bfs, replacing keys that are visited with the designation keys - - keys can only be visited if they are in the current namespace - - this can be re-written as 1. perform bfs 2. remove keys in namespace - """ - resolved = self._resolve_imports( - imports=imports, - mode=mode, - _restrict_cache_=None, - ) - resolved = set( - normalize_imports_pipe( - resolved, - roots=roots, - builtin=builtin, - keys=False, - ) - ) - return resolved - - def _resolve_imports( - self, - imports: Iterable[ImportType], - mode: str, - _restrict_cache_: Optional[Dict[ImportKey, Set[ImportKey]]], - ) -> Set[ImportKey]: - if _restrict_cache_ is None: - _restrict_cache_ = {} - - def get_restricted_imports(keys: ImportKey) -> Set[ImportKey]: - unique = _restrict_cache_.get(keys, None) - if unique is None: - unique = set( - imp.target_keys - for imp in self.restrict(keys, mode=mode).imports(builtin=True) - ) - _restrict_cache_[keys] = unique - return unique - - # 1. BFS - stack: List[ImportKey] = list(set(import_to_keys(i) for i in imports)) - visited: Set[ImportKey] = set() - while stack: - current = stack.pop() - visited.add(current) - for imp in get_restricted_imports(current): - if imp in visited: - continue - stack.append(imp) - - # 2. DELETE OLD RESULTS - visited -= self._modules.keys() - - # convert the imports back to strings - return visited - - def resolve( - self, - namespace: "ModuleNamespace" = None, - roots: bool = False, - builtin: bool = True, - mode: str = "exact", - ) -> Dict[str, Set[str]]: - # multiple packages in the same project may depend on each other - # - this function finds those imports and replaces them with - # the imports from the other package, effectively finding all - # required parent dependencies in the tree - against = self - if namespace is None: - namespace = self - - # speed things up by reusing results - _restrict_cache_ = {} - - # for each module, BFS all the imports - # - this is not as efficient as doing everything in a pass - # over the actual imports and replacing everything as we - # go, but conceptually, this is much easier to manage! - module_imports = {} - for key, module in namespace._modules.items(): - module_imports[key] = against._resolve_imports( - imports=module.imports(), - mode=mode, - _restrict_cache_=_restrict_cache_, - ) - # update the cache based on the current results to improve future speed! - # this is duplicating conversion... - _restrict_cache_[module.import_keys] = module_imports[key] - - # normalize the final results - module_imports = { - ".".join(k): set( - normalize_imports_pipe( - resolved, - roots=roots, - builtin=builtin, - keys=False, - ) - ) - for k, resolved in module_imports.items() - } - - return module_imports - - -# ========================================================================= # -# EXPORT # -# ========================================================================= # - -__all__ = ( - "Import", - "Module", - "ModuleNamespace", - "import_get_root", - "import_is_builtin", - "import_to_keys", -) diff --git a/pydependance/__init__.py b/pydependence/__init__.py similarity index 92% rename from pydependance/__init__.py rename to pydependence/__init__.py index 4421ec1..38df5a2 100644 --- a/pydependance/__init__.py +++ b/pydependence/__init__.py @@ -1,7 +1,7 @@ # ============================================================================== # # MIT License # # # -# Copyright (c) 2022 Nathan # +# Copyright (c) 2024 Nathan Juraj Michlo # # # # Permission is hereby granted, free of charge, to any person obtaining a copy # # of this software and associated documentation files (the "Software"), to deal # @@ -21,5 +21,3 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # # SOFTWARE. # # ============================================================================== # - -from pydependance._core import Import, Module, ModuleNamespace, import_is_builtin diff --git a/pydependence/__main__.py b/pydependence/__main__.py new file mode 100644 index 0000000..b822bad --- /dev/null +++ b/pydependence/__main__.py @@ -0,0 +1,551 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # +import sys +from collections import defaultdict +from pathlib import Path +from typing import Annotated, Dict, List, Literal, Optional, Union + +import pydantic +from packaging.requirements import Requirement + +from pydependence._core.modules_scope import ModulesScope, RestrictMode, RestrictOp +from pydependence._core.requirements_gen import ( + WriteMode, + WriteRequirement, + WriteRules, + generate_output_requirements, +) +from pydependence._core.requirements_map import ( + DEFAULT_REQUIREMENTS_ENV, + ImportMatcherBase, + ImportMatcherGlob, + ImportMatcherScope, + RequirementsMapper, +) +from pydependence._core.requirements_writers import read_and_dump_toml_imports +from pydependence._core.utils import ( + apply_root_to_path_str, + is_relative_path, + load_toml_document, +) + +# ========================================================================= # +# CONFIGS # +# ========================================================================= # + + +class _WriteRules(pydantic.BaseModel, extra="forbid"): + builtin: Optional[WriteMode] = None + start_scope: Optional[WriteMode] = None + lazy: Optional[WriteMode] = None + + @classmethod + def make_default(cls): + return cls( + rule_is_builtin=WriteMode.exclude, + rule_start_scope=WriteMode.exclude, + rule_is_lazy=WriteMode.comment, + ) + + def get_write_rules(self) -> WriteRules: + assert self.builtin is not None + assert self.start_scope is not None + assert self.lazy is not None + return WriteRules( + write_mode_is_builtin=self.builtin, + write_mode_start_scope=self.start_scope, + write_mode_is_lazy=self.lazy, + ) + + def set_defaults(self, defaults: "_WriteRules"): + assert defaults.builtin is not None + assert defaults.start_scope is not None + assert defaults.lazy is not None + if self.builtin is None: + self.builtin = defaults.builtin + if self.start_scope is None: + self.start_scope = defaults.start_scope + if self.lazy is None: + self.lazy = defaults.lazy + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # +# CONFIG - OUTPUT # +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # + + +class _Output(pydantic.BaseModel, extra="forbid"): + # resolve + scope: str + start_scope: Optional[str] = None + skip_lazy: bool = False + + # env + env: str = DEFAULT_REQUIREMENTS_ENV + + # output filtering + write_rules: _WriteRules = pydantic.Field(default_factory=_WriteRules) + + # output + output_mode: str + output_file: str + output_name: Optional[str] = None + + def get_output_name(self) -> str: + if self.output_name is not None: + return self.output_name + elif self.start_scope is not None: + return self.start_scope + else: + return self.scope + + def _write_requirements(self, requirements: List[WriteRequirement]): + raise NotImplementedError( + f"tried to write imports for {repr(self.get_output_name())}, write_imports not implemented for {self.__class__.__name__}" + ) + + def generate_and_write_requirements( + self, + loaded_scopes: Dict[str, ModulesScope], + requirements_mapper: RequirementsMapper, + ): + requirements = generate_output_requirements( + scope=loaded_scopes[self.scope], + start_scope=loaded_scopes[self.start_scope] if self.start_scope else None, + requirements_mapper=requirements_mapper, + requirements_env=self.env, + write_rules=self.write_rules.get_write_rules(), + ) + return self._write_requirements(requirements=requirements) + + +class _OutputRequirements(_Output): + output_mode: Literal["requirements"] + + def _write_requirements(self, requirements: List[WriteRequirement]): + raise NotImplementedError + + +class _OutputPyprojectOptionalDeps(_Output): + output_mode: Literal["optional-dependencies"] + output_file: Optional[str] = None + + def _write_requirements(self, requirements: List[WriteRequirement]): + read_and_dump_toml_imports( + file=self.output_file, + keys=["project", "optional-dependencies", self.get_output_name()], + requirements=requirements, + ) + + +class _OutputPyprojectDeps(_Output): + output_mode: Literal["dependencies"] + output_file: Optional[str] = None + + def _write_requirements(self, requirements: List[WriteRequirement]): + read_and_dump_toml_imports( + file=self.output_file, + keys=["project", "dependencies"], + requirements=requirements, + ) + + +CfgResolver = Annotated[ + Union[ + _OutputRequirements, + _OutputPyprojectOptionalDeps, + _OutputPyprojectDeps, + ], + pydantic.Field(discriminator="output_mode", union_mode="left_to_right"), +] + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # +# CONFIG - PACKAGES # +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # + + +class CfgVersion(pydantic.BaseModel, extra="forbid", arbitrary_types_allowed=True): + # the pip install requirement + requirement: str + # the imports to replace + import_: Optional[str] = pydantic.Field(default=None, alias="import") + scope: Optional[str] = None + # only apply this import to this environment + env: str = DEFAULT_REQUIREMENTS_ENV + + @property + def parsed_requirement(self) -> Requirement: + return Requirement(self.requirement) + + @property + def package(self) -> str: + return self.parsed_requirement.name + + @classmethod + def from_string(cls, requirement: str): + return cls(requirement=requirement) + + def get_import_matcher( + self, loaded_scopes: "Dict[str, ModulesScope]" + ) -> ImportMatcherBase: + if self.scope is not None: + if self.import_ is not None: + raise ValueError(f"cannot specify both scope and import for: {self}") + else: + return ImportMatcherScope(scope=loaded_scopes[self.scope]) + else: + if self.import_ is None: + raise ValueError(f"must specify either scope or import for: {self}") + else: + return ImportMatcherGlob(import_glob=self.import_) + + @pydantic.model_validator(mode="after") + @classmethod + def _validate_model_before(cls, v: "CfgVersion"): + if not str.isidentifier(v.env.replace("-", "_")): + raise ValueError( + f"env must be a valid identifier (with hyphens replaced with underscores), got: {v.env}" + ) + if v.import_ is None and v.scope is None: + v.import_ = f"{v.package}.*" # wildcard + elif v.import_ is not None and v.scope is not None: + raise ValueError(f"cannot specify both scope and import for: {v}") + return v + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # +# CONFIG - SCOPE # +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # + + +class CfgScope(pydantic.BaseModel, extra="forbid"): + # name + # - must be unique across all scopes & sub-scopes + name: str + + # parents + parents: List[str] = pydantic.Field(default_factory=list) + + # search paths + search_paths: List[str] = pydantic.Field(default_factory=list) + pkg_paths: List[str] = pydantic.Field(default_factory=list) + + # extra packages + packages: List[str] = pydantic.Field(default_factory=list) + + # filtering: limit > exclude > [include!!] (in that order) + # - order is important because it allows us to remove a band of modules + # e.g. limit=foo.bar, exclude=foo.bar.baz, include=foo.bar.baz.qux + # if order of include and exclude were swapped, then the exclude would + # remove the module after the include added it back in + limit: Optional[List[str]] = None + exclude: Optional[List[str]] = None + # include: Optional[str] = None # NOT IMPLEMENTED BECAUSE IT IS REDUNDANT, AND `PARENTS` CAN BE USED INSTEAD + + # sub-scopes + # - name to import path map + # - names must be unique across all scopes & sub-scopes + # - imports must belong to the scope + subscopes: Dict[str, str] = pydantic.Field(default_factory=dict) + + @pydantic.field_validator("search_paths", mode="before") + @classmethod + def _validate_search_paths(cls, v): + return [v] if isinstance(v, str) else v + + @pydantic.field_validator("pkg_paths", mode="before") + @classmethod + def _validate_pkg_paths(cls, v): + return [v] if isinstance(v, str) else v + + @pydantic.field_validator("limit", mode="before") + @classmethod + def _validate_limit(cls, v): + return [v] if isinstance(v, str) else v + + @pydantic.field_validator("exclude", mode="before") + @classmethod + def _validate_exclude(cls, v): + return [v] if isinstance(v, str) else v + + def make_module_scope(self, loaded_scopes: "Dict[str, ModulesScope]" = None): + m = ModulesScope() + + # 1. load parents + if self.parents: + if loaded_scopes is None: + raise ValueError("loaded_scopes must be provided if parents are used!") + for parent in self.parents: + if parent not in loaded_scopes: + raise ValueError( + f"parent scope {repr(parent)} has not yet been created, are you sure the order of definitions is correct?" + ) + m.add_modules_from_scope(loaded_scopes[parent]) + + # 2. load new search paths and packages + for path in self.search_paths: + m.add_modules_from_search_path(Path(path), tag=self.name) + for path in self.pkg_paths: + m.add_modules_from_package_path(Path(path), tag=self.name) + + # 3. add extra packages + if self.packages: + raise NotImplementedError + # m.add_modules_from_raw_imports(imports=self.packages) + + # 4. filter everything + # - a. limit, b. exclude, [c. include (replaced with parents)] + if self.limit: + m = m.get_restricted_scope( + imports=self.limit, mode=RestrictMode.CHILDREN, op=RestrictOp.LIMIT + ) + if self.exclude: + m = m.get_restricted_scope( + imports=self.exclude, + mode=RestrictMode.CHILDREN, + op=RestrictOp.EXCLUDE, + ) + + # done! + return m + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # +# CONFIG - ROOT # +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # + + +class PydependenceCfg(pydantic.BaseModel, extra="forbid"): + # default root is relative to the parent of the pyproject.toml file + # and is the folder containing the repo of the pyproject.toml file + default_root: str = ".." + + # default write modes + default_write_rules: _WriteRules = pydantic.Field( + default_factory=_WriteRules.make_default + ) + + # config + strict_requirements_map: bool = True + + # package versions + versions: List[CfgVersion] = pydantic.Field(default_factory=list) + + # resolve + scopes: List[CfgScope] = pydantic.Field(default_factory=dict) + + # outputs + resolvers: List[CfgResolver] = pydantic.Field(default_factory=list) + + @pydantic.field_validator("versions", mode="before") + @classmethod + def _validate_versions(cls, v, values): + versions = [] + reqs_envs = set() # pairs of tags and req names must be unique for now + for x in v: + if isinstance(x, str): + x = CfgVersion.from_string(x) + else: + x = CfgVersion.model_validate(x) + req_env = (x.package, x.env) + if req_env in reqs_envs: + raise ValueError( + f"requirement {repr(x.package)} and env {repr(x.env)} combination is defined multiple times! ({repr(x.requirement)})" + ) + reqs_envs.add(req_env) + versions.append(x) + return versions + + @pydantic.model_validator(mode="after") + @classmethod + def _validate_model(cls, cfg: "PydependenceCfg"): + # 1. check that scope names are all unique + scope_names = set() + for scope in cfg.scopes: + if scope.name in scope_names: + raise ValueError(f"scope name {repr(scope.name)} is not unique!") + scope_names.add(scope.name) + + # 2. check that all sub-scope names are unique + for scope in cfg.scopes: + for subscope_name in scope.subscopes: + if subscope_name in scope_names: + raise ValueError( + f"sub-scope name {repr(subscope_name)} is not unique!" + ) + scope_names.add(subscope_name) + + # 3. check that all packages + # TODO + + # 4. check that the default root is a relative path + # '..' should be considered a relative path + # '.' should be considered a relative path + # `not is_absolute` is not enough! + if not is_relative_path(cfg.default_root): + raise ValueError( + f"default_root must be a relative path, got: {repr(cfg.default_root)}" + ) + return cfg + + def apply_defaults(self, *, pyproject_path: Path): + if pyproject_path.name != "pyproject.toml": + raise ValueError( + f"path must be a pyproject.toml file, got: {pyproject_path}" + ) + + # helper + self.default_root = apply_root_to_path_str( + pyproject_path.parent, self.default_root + ) + s = lambda x: apply_root_to_path_str(self.default_root, x) + + # apply to all paths + for scope in self.scopes: + scope.search_paths = [s(x) for x in scope.search_paths] + scope.pkg_paths = [s(x) for x in scope.pkg_paths] + for output in self.resolvers: + if output.output_file is not None: + output.output_file = s(output.output_file) + if output.output_file is None: + if isinstance( + output, (_OutputPyprojectDeps, _OutputPyprojectOptionalDeps) + ): + output.output_file = s(pyproject_path) + + # also apply all default write modes + for output in self.resolvers: + output.write_rules.set_defaults(self.default_write_rules) + + def load_scopes(self) -> Dict[str, ModulesScope]: + # resolve all scopes + loaded_scopes: "Dict[str, ModulesScope]" = {} + for scope_cfg in self.scopes: + scope = scope_cfg.make_module_scope(loaded_scopes=loaded_scopes) + loaded_scopes[scope_cfg.name] = scope + # now create sub-scopes + for subcol_name, subcol_import_root in scope_cfg.subscopes.items(): + subscope = scope.get_restricted_scope( + imports=[subcol_import_root], mode=RestrictMode.CHILDREN + ) + loaded_scopes[subcol_name] = subscope + # done! + return loaded_scopes + + def make_requirements_mapper( + self, + loaded_scopes: "Dict[str, ModulesScope]", + ): + env_matchers = defaultdict(list) + for v in self.versions: + import_matcher = v.get_import_matcher(loaded_scopes=loaded_scopes) + pair = (v.requirement, import_matcher) + env_matchers[v.env].append(pair) + env_matchers = dict(env_matchers) + + return RequirementsMapper( + env_matchers=env_matchers, + strict=self.strict_requirements_map, + ) + + def write_all_outputs(self, loaded_scopes: "Dict[str, ModulesScope]"): + # check that scope output names are unique + names = set() + for output in self.resolvers: + name = output.get_output_name() + if name in names: + raise ValueError(f"output name {repr(name)} is not unique!") + names.add(name) + + # check that the scopes exists + for output in self.resolvers: + if output.scope not in loaded_scopes: + raise ValueError( + f"output scope {repr(output.scope)} does not exist! Are you sure it has been defined?" + ) + if output.start_scope and output.start_scope not in loaded_scopes: + raise ValueError( + f"output start_scope {repr(output.start_scope)} does not exist! Are you sure it has been defined?" + ) + + # make the mapper + requirements_mapper = self.make_requirements_mapper(loaded_scopes=loaded_scopes) + + # resolve the scopes! + for output in self.resolvers: + output.generate_and_write_requirements( + loaded_scopes=loaded_scopes, + requirements_mapper=requirements_mapper, + ) + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # +# CONFIG - PYPROJECT # +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # + + +class PyprojectTomlTools(pydantic.BaseModel, extra="ignore"): + pydependence: PydependenceCfg + + +class PyprojectToml(pydantic.BaseModel, extra="ignore"): + tool: PyprojectTomlTools = pydantic.Field(default_factory=PyprojectTomlTools) + + @classmethod + def from_pyproject(cls, path: Path) -> "PyprojectToml": + # 1. load pyproject.toml + toml = load_toml_document(path) + # 2. validate the model + pyproject = PyprojectToml.model_validate(toml.unwrap()) + # 3. override paths in cfg using the default root + pyproject.tool.pydependence.apply_defaults( + pyproject_path=path, + ) + return pyproject + + +# ========================================================================= # +# COLLECT MODULES # +# ========================================================================= # + + +def pydeps(): + script, file = sys.argv + # 1. get absolute + file = Path(file).resolve().absolute() + # 2. load pyproject.toml + pyproject = PyprojectToml.from_pyproject(file) + # 3. generate search spaces, recursively resolving! + loaded_scopes = pyproject.tool.pydependence.load_scopes() + # 4. generate outputs + pyproject.tool.pydependence.write_all_outputs(loaded_scopes) + + +# ========================================================================= # +# CLI # +# ========================================================================= # + + +if __name__ == "__main__": + pydeps() diff --git a/pydependance/_colors.py b/pydependence/_colors.py similarity index 97% rename from pydependance/_colors.py rename to pydependence/_colors.py index 9896507..364b6e6 100644 --- a/pydependance/_colors.py +++ b/pydependence/_colors.py @@ -1,7 +1,7 @@ # ============================================================================== # # MIT License # # # -# Copyright (c) 2022 Nathan # +# Copyright (c) 2024 Nathan Juraj Michlo # # # # Permission is hereby granted, free of charge, to any person obtaining a copy # # of this software and associated documentation files (the "Software"), to deal # diff --git a/pydependence/_core/__init__.py b/pydependence/_core/__init__.py new file mode 100644 index 0000000..38df5a2 --- /dev/null +++ b/pydependence/_core/__init__.py @@ -0,0 +1,23 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # diff --git a/pydependence/_core/builtin.py b/pydependence/_core/builtin.py new file mode 100644 index 0000000..9452344 --- /dev/null +++ b/pydependence/_core/builtin.py @@ -0,0 +1,43 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + + +from stdlib_list import stdlib_list as _stdlib_list + +# check the python version +# if sys.version_info < (3, 10): +# print("please use python >= 3.10") +# exit(1) + + +# TODO: for python 3.10 and up, can use `sys.stdlib_module_names` or `sys.builtin_module_names` +BUILTIN_MODULE_NAMES = { + # "__main__", + # *sys.builtin_module_names, + # *sys.stdlib_module_names, + *_stdlib_list(), +} + + +__all__ = ("BUILTIN_MODULE_NAMES",) diff --git a/pydependence/_core/module_data.py b/pydependence/_core/module_data.py new file mode 100644 index 0000000..bf9bb4d --- /dev/null +++ b/pydependence/_core/module_data.py @@ -0,0 +1,140 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + +import pkgutil +import warnings +from importlib.machinery import FileFinder +from pathlib import Path +from typing import Iterator, NamedTuple + +from pydependence._core.utils import assert_valid_import_name, assert_valid_tag + +# ========================================================================= # +# MODULE INFO # +# ========================================================================= # + + +class ModuleMetadata(NamedTuple): + path: Path + name: str + ispkg: bool + + # tag e.g. if package `yolov5` package loaded this, the `utils` module is not unique... + tag: str + + @property + def root_name(self) -> str: + return self.name.split(".")[0] + + @property + def tagged_name(self): + return f"{self.tag}:{self.name}" + + @property + def is_name_valid(self): + try: + assert_valid_import_name(self.name) + return True + except Exception: + return False + + @property + def pkgutil_module_info(self): + if not self.path.is_absolute(): + raise ValueError(f"Path must be absolute, got: {self.path}") + return pkgutil.ModuleInfo( + module_finder=FileFinder(path=str(self.path)), + name=self.name, + ispkg=self.ispkg, + ) + + @classmethod + def from_root_and_subpath( + cls, + root: Path, + subpath: Path, + tag: str, + ) -> "ModuleMetadata": + if not root.is_absolute(): + raise ValueError(f"Root path must be absolute, got: {root}") + if not subpath.is_absolute(): + subpath = root / subpath + if not subpath.name.endswith(".py"): + raise ValueError(f"Subpath must be a python file, got: {subpath}") + if not subpath.is_file(): + raise FileNotFoundError(f"Subpath must be an existing file, got: {subpath}") + tag = assert_valid_tag(tag) + rel = subpath.relative_to(root) + if rel.name == "__init__.py": + return ModuleMetadata( + path=subpath, + name=".".join(rel.parts[:-1]), + ispkg=True, + tag=tag, + ) + else: + return ModuleMetadata( + path=subpath, + name=".".join(rel.parts)[: -len(".py")], + ispkg=False, + tag=tag, + ) + + @classmethod + def yield_search_path_modules( + cls, search_path: Path, *, tag: str, valid_only: bool = True + ) -> "Iterator[ModuleMetadata]": + for p in search_path.glob("**/*.py"): + m = cls.from_root_and_subpath(search_path, subpath=p, tag=tag) + if valid_only and (not m.is_name_valid): + warnings.warn( + f"Invalid module name: {m.name}, cannot be imported or resolved, skipping: {m.path}" + ) + continue + yield m + # Only one level deep & does not work if __init__.py is not present. + # yield from pkgutil.iter_modules(path=[str(search_path)], prefix='') + + @classmethod + def yield_package_modules( + cls, package_path: Path, *, tag: str, valid_only: bool = True + ) -> "Iterator[ModuleMetadata]": + for p in package_path.glob("**/*.py"): + m = cls.from_root_and_subpath(package_path.parent, subpath=p, tag=tag) + if valid_only and (not m.is_name_valid): + warnings.warn( + f"Invalid module name: {m.name}, cannot be imported or resolved, skipping: {m.path}" + ) + continue + yield m + # Only one level deep & does not work if __init__.py is not present. + # yield from pkgutil.iter_modules(path=[str(package_path)], prefix=f"{package_path.name}.") + + +# ========================================================================= # +# END # +# ========================================================================= # + + +__all__ = ("ModuleMetadata",) diff --git a/pydependence/_core/module_imports_ast.py b/pydependence/_core/module_imports_ast.py new file mode 100644 index 0000000..c6bea75 --- /dev/null +++ b/pydependence/_core/module_imports_ast.py @@ -0,0 +1,647 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + + +import ast +import warnings +from collections import Counter, defaultdict +from enum import Enum +from typing import DefaultDict, Dict, List, NamedTuple, Optional, Tuple + +from pydependence._core.module_data import ModuleMetadata +from pydependence._core.utils import assert_valid_import_name, assert_valid_module_path + +# ========================================================================= # +# Allowed Nodes Leading To Imports # +# ========================================================================= # + +# PYTHON VERSION: 3.10 +# every possible way to reach an import statement, or because we also choose to +# support plugins, all expressions should be reachable. We skip the AST nodes +# that do not matter in reaching these. +# - https://docs.python.org/3/library/ast.html +_DISALLOWED_IMPORT_STATEMENT_NODES = { + # -- the following expression can appear in assignment context + # | Name(identifier id, expr_context ctx) + "Name", + # | Global(identifier* names) + # | Nonlocal(identifier* names) + # | Pass | Break | Continue + # | Constant(constant value, string? kind) + "Global", + "Nonlocal", + "Pass", + "Break", + "Continue", + "Constant", + # alias = (identifier name, identifier? asname) + # attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) + "alias" + # attributes (int lineno, int col_offset, int end_lineno, int end_col_offset) + "attributes", + # type_ignore = TypeIgnore(int lineno, string tag) + "TypeIgnore", + # ================================================================================ # + # expr_context = Load | Store | Del + "Load", + "Store", + "Del", + # boolop = And | Or + "And", + "Or", + # operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift + # | RShift | BitOr | BitXor | BitAnd | FloorDiv + "Add", + "Sub", + "Mult", + "MatMult", + "Div", + "Mod", + "Pow", + "LShift", + "RShift", + "BitOr", + "BitXor", + "BitAnd", + "FloorDiv", + # unaryop = Invert | Not | UAdd | USub + "Invert", + "Not", + "UAdd", + "USub", + # cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn + "Eq", + "NotEq", + "Lt", + "LtE", + "Gt", + "GtE", + "Is", + "IsNot", + "In", + "NotIn", +} + +# these nodes are considered to cause lazy evaluation of imports. Side effects +# on a module level for example should be considered unsupported behavior and should +# never be considered for triggering an import. +_IS_INDIRECT_NODE = { + "FunctionDef", + "AsyncFunctionDef", +} + + +# lazy import callables +_LAZY_IMPORT_CALLABLES = { + "lazy_import", +} + +# lazy attribute callables +_LAZY_ATTRIBUTE_CALLABLES = { + "lazy_callable", + "lazy_inheritable", +} + +# lazy +_LAZY_CALLABLES = {*_LAZY_IMPORT_CALLABLES, *_LAZY_ATTRIBUTE_CALLABLES} + + +# ========================================================================= # +# AST IMPORT PARSER # +# ========================================================================= # + + +class ImportSourceEnum(str, Enum): + import_ = "import_" + import_from = "import_from" + lazy_plugin = "lazy_plugin" + # type_check = 'type_check' # TODO + + +class LocImportInfo(NamedTuple): + # source, e.g. import statement or type check or lazy plugin + source_module_info: ModuleMetadata + source: ImportSourceEnum + # target + target: str + is_lazy: bool + # debug + lineno: int + col_offset: int + stack_type_names: Tuple[str, ...] + # relative import + is_relative: bool + + @property + def tagged_target(self) -> str: + return f"{self.source_module_info.tag}:{self.target}" + + @property + def tagged_name_and_target(self) -> str: + return f"{self.source_module_info.tagged_name}:{self.target}" + + +class _AstImportsCollector(ast.NodeVisitor): + + def __init__(self, module_info: ModuleMetadata): + self._module_info: ModuleMetadata = module_info + self._imports: "DefaultDict[str, List[LocImportInfo]]" = defaultdict(list) + self._stack_is_lazy: "List[bool]" = [False] + self._stack_ast_kind: "List[str]" = [] + self._counter = Counter() + + # ~=~=~ WARN ~=~=~ # + + def _node_warn(self, node: ast.AST, message: str): + warnings.warn_explicit( + message=f"`{ast.unparse(node)}`: {message}", + category=SyntaxWarning, + filename=str(self._module_info.path), + lineno=node.lineno, + ) + + # ~=~=~ STACK ~=~=~ # + + def _push_current_import( + self, + node: ast.AST, + target: str, + source: ImportSourceEnum, + is_lazy: "Optional[bool]" = None, + is_relative: bool = False, + ): + import_ = LocImportInfo( + source_module_info=self._module_info, + target=target, + is_lazy=self._stack_is_lazy[-1] if (is_lazy is None) else is_lazy, + lineno=node.lineno, + col_offset=node.col_offset, + source=source, + stack_type_names=tuple(self._stack_ast_kind), + is_relative=is_relative, + ) + self._imports[target].append(import_) + + # ~=~=~ VISIT ~=~=~ # + + def visit(self, node): + kind = node.__class__.__name__ + if kind in _DISALLOWED_IMPORT_STATEMENT_NODES: + return + # push - basic interpreter + is_lazy = self._stack_is_lazy[-1] or (kind in _IS_INDIRECT_NODE) + self._stack_ast_kind.append(kind) + self._stack_is_lazy.append(is_lazy) + # continue recursion + try: + getattr(self, "visit_" + kind, self.generic_visit)(node) + finally: + # pop + self._stack_is_lazy.pop() + self._stack_ast_kind.pop() + + def generic_visit_lazy(self, node): + self._stack_is_lazy.append(True) + try: + self.generic_visit(node) + finally: + self._stack_is_lazy.pop() + + # >>> VISIT NODES <<< # + + def visit_FunctionDef(self, node): + return self.generic_visit_lazy(node) + + def visit_AsyncFunctionDef(self, node): + return self.generic_visit_lazy(node) + + def visit_Import(self, node: ast.Import): + # eg. import pkg.submodule + for alias in node.names: + self._push_current_import( + node=node, + target=alias.name, + source=ImportSourceEnum.import_, + ) + + def visit_ImportFrom(self, node: ast.ImportFrom): + assert node.level in (0, 1) # node.names: from * import name, ... + # eg: from . import ? + # eg: from .submodule import ? + # eg: from pkg.submodule import ? + is_relative = node.level != 0 + if is_relative: + _parts = self._module_info.name.split(".") + if not self._module_info.ispkg: + _parts.pop() + _parts.append(node.module) + target = ".".join(_parts) + assert_valid_import_name(target) + else: + target = node.module + self._push_current_import( + node=node, + target=target, + source=ImportSourceEnum.import_from, + is_relative=is_relative, + ) + + # >>> CUSTOM LAZY IMPORT LIBRARY + + def visit_If(self, node: ast.If): + """ + check name is `TYPE_CHECKING` or attr is `typing.TYPE_CHECKING`: + - WE DON'T SUPPORT ANY OTHER VARIATIONS + - WE ASSUME THE VARIABLE IS NEVER RENAMED + """ + # check if this is a type checking block + is_type_checking = False + if isinstance(node.test, ast.Attribute): + if ( + isinstance(node.test.value, ast.Name) + and node.test.value.id == "typing" + and node.test.attr == "TYPE_CHECKING" + ): + is_type_checking = True + elif isinstance(node.test, ast.Name): + if node.test.id == "TYPE_CHECKING": + is_type_checking = True + # recurse + if not is_type_checking: + return self.generic_visit(node) + else: + return self.generic_visit_lazy(node) + + def visit_Call(self, node: ast.Call): + """ + we don't implement an interpreter, we only handle lazy imports with an + exact function name and a single string argument. These functions should + be defined by the user. We do not provide any default functions. + e.g. `lazy_import("os.path")` + we don't support attribute access or any deviation from the above + e.g. `util.lazy_import("os.path.join")` + + - the function names must be one of: + * `lazy_import("os.path")` + * `lazy_callable("pathlib.Attr")` # final .Path is excluded, import is `pathlib` + * `lazy_inheritable("pathlib.Attr")` # final .Path is excluded, import is `pathlib` + - WE DON'T SUPPORT ANY OTHER VARIATIONS + - WE ASSUME THE VARIABLE IS NEVER RENAMED + """ + # - check the call is directly on a name e.g. `lazy_import(...)` and not `util.lazy_import(...)` or `util['lazy_import'](...)` + if not isinstance(node.func, ast.Name): + return + # - check the function name is one of the lazy import functions + name = node.func.id + if name not in _LAZY_CALLABLES: + return + # - make sure no keyword arguments are used, these invalidate the import. + if node.keywords: + self._node_warn(node, f"should not have keyword arguments.") + return + # - make sure that the function is called with a single string argument + if not len(node.args) == 1: + self._node_warn( + node, f"called with {len(node.args)} arguments, expected: 1" + ) + return + [arg] = node.args + # - make sure that the argument is a string + if not isinstance(arg, ast.Constant) or not isinstance(arg.value, str): + self._node_warn( + node, f"called with non-string argument: `{ast.unparse(arg)}`" + ) + return + # - validate the import string + import_ = arg.value + try: + assert_valid_import_name(import_) + except Exception as e: + self._node_warn(node, f"called with invalid import path: {e}") + return + # - check if the import path includes an attribute and strip it + if name in _LAZY_ATTRIBUTE_CALLABLES: + _parts = import_.rsplit(".", maxsplit=1) + if len(_parts) < 2: + self._node_warn( + node, f"called with invalid import path to an attribute: {import_}" + ) + return + import_ = _parts[0] + # - add the import + self._push_current_import( + node=node, + target=import_, + source=ImportSourceEnum.lazy_plugin, + is_lazy=True, + ) + + # >>> PRETTY PRINT <<< # + + @classmethod + def _ast_to_dict(cls, n): + if isinstance(n, ast.AST): + return {f: cls._ast_to_dict(getattr(n, f)) for f in n._fields} + elif isinstance(n, list): + return [cls._ast_to_dict(i) for i in n] + else: + return n + + @classmethod + def load_imports_from_module_info( + cls, module_info: ModuleMetadata, *, debug: bool = False + ) -> "Dict[str, List[LocImportInfo]]": + # load the file & parse + path = assert_valid_module_path(module_info.path) + name = assert_valid_import_name(module_info.name) + with open(path) as fp: + _dat = fp.read() + _ast = ast.parse(_dat) + # collect imports + _parser = _AstImportsCollector(module_info=module_info) + _parser.visit(_ast) + # debug + if debug: + total = sum(_parser._counter.values()) + top = _parser._counter.most_common(5) + print( + f"Visited {total} nodes, top 5: {top} for module: {repr(name)} file: {module_info.path}" + ) + # done! + return _parser._imports + + +def load_imports_from_module_info( + module_info: ModuleMetadata, +) -> "Dict[str, List[LocImportInfo]]": + return _AstImportsCollector.load_imports_from_module_info(module_info) + + +# ========================================================================= # +# END # +# ========================================================================= # + + +__all__ = ( + "load_imports_from_module_info", + "LocImportInfo", + "ImportSourceEnum", +) + + +# PYTHON VERSION: 3.10 +# { +# # mod = Module(stmt* body, type_ignore* type_ignores) +# # | Interactive(stmt* body) +# # | Expression(expr body) +# # | FunctionType(expr* argtypes, expr returns) +# "Module", +# "Interactive", +# "Expression", +# # stmt = FunctionDef(identifier name, arguments args, +# # stmt* body, expr* decorator_list, expr? returns, +# # string? type_comment) +# # | AsyncFunctionDef(identifier name, arguments args, +# # stmt* body, expr* decorator_list, expr? returns, +# # string? type_comment) +# # +# "FunctionDef", +# "AsyncFunctionDef", +# # | ClassDef(identifier name, +# # expr* bases, +# # keyword* keywords, +# # stmt* body, +# # expr* decorator_list) +# # | Return(expr? value) +# "ClassDef", +# "Return", +# # | Delete(expr* targets) +# # | Assign(expr* targets, expr value, string? type_comment) +# # | AugAssign(expr target, operator op, expr value) +# "Delete", +# "Assign", +# "AugAssign", +# # -- 'simple' indicates that we annotate simple name without parens +# # | AnnAssign(expr target, expr annotation, expr? value, int simple) +# "AnnAssign", +# # -- use 'orelse' because else is a keyword in target languages +# # | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) +# # | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) +# # | While(expr test, stmt* body, stmt* orelse) +# # | If(expr test, stmt* body, stmt* orelse) +# # | With(withitem* items, stmt* body, string? type_comment) +# # | AsyncWith(withitem* items, stmt* body, string? type_comment) +# "For", +# "AsyncFor", +# "While", +# "If", +# "With", +# "AsyncWith", +# # | Match(expr subject, match_case* cases) # stmt +# "Match", +# # | Raise(expr? exc, expr? cause) +# # | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) +# # | TryStar(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) +# # | Assert(expr test, expr? msg) +# "Raise", +# "Try", +# "TryStar", +# "Assert", +# # | Import(alias* names) +# # | ImportFrom(identifier? module, alias* names, int? level) +# "Import", +# "ImportFrom", +# # | Global(identifier* names) +# # | Nonlocal(identifier* names) +# # | Expr(expr value) +# # | Pass | Break | Continue +# # +# # -- col_offset is the byte offset in the utf8 string the parser uses +# # attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) +# "Global", +# "Nonlocal", +# "Expr", +# "Pass", +# "Break", +# "Continue", +# # -- BoolOp() can use left & right? +# # expr = BoolOp(boolop op, expr* values) +# # | NamedExpr(expr target, expr value) +# # | BinOp(expr left, operator op, expr right) +# # | UnaryOp(unaryop op, expr operand) +# "BoolOp", +# "NamedExpr", +# "BinOp", +# "UnaryOp", +# # | Lambda(arguments args, expr body) +# # | IfExp(expr test, expr body, expr orelse) +# # | Dict(expr* keys, expr* values) +# # | Set(expr* elts) +# # | ListComp(expr elt, comprehension* generators) +# # | SetComp(expr elt, comprehension* generators) +# # | DictComp(expr key, expr value, comprehension* generators) +# # | GeneratorExp(expr elt, comprehension* generators) +# "Lambda", +# "IfExp", +# "Dict", +# "Set", +# "ListComp", +# "SetComp", +# "DictComp", +# "GeneratorExp", +# # -- the grammar constrains where yield expressions can occur +# # | Await(expr value) +# # | Yield(expr? value) +# # | YieldFrom(expr value) +# "Await", +# "Yield", +# "YieldFrom", +# # -- need sequences for compare to distinguish between +# # -- x < 4 < 3 and (x < 4) < 3 +# # | Compare(expr left, cmpop* ops, expr* comparators) +# # | Call(expr func, expr* args, keyword* keywords) +# # | FormattedValue(expr value, int conversion, expr? format_spec) +# # | JoinedStr(expr* values) +# # | Constant(constant value, string? kind) +# "Compare", +# "Call", +# "FormattedValue", +# "JoinedStr", +# "Constant", +# # -- the following expression can appear in assignment context +# # | Attribute(expr value, identifier attr, expr_context ctx) +# # | Subscript(expr value, expr slice, expr_context ctx) +# # | Starred(expr value, expr_context ctx) +# # | Name(identifier id, expr_context ctx) +# # | List(expr* elts, expr_context ctx) +# # | Tuple(expr* elts, expr_context ctx) +# "Attribute", +# "Subscript", +# "Starred", +# "Name", +# "List", +# "Tuple", +# # -- can appear only in Subscript +# # | Slice(expr? lower, expr? upper, expr? step) +# "Slice", +# # -- col_offset is the byte offset in the utf8 string the parser uses +# # attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) +# # +# # expr_context = Load | Store | Del +# "Load", +# "Store", +# "Del", +# # boolop = And | Or +# "And", +# "Or", +# # operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift +# # | RShift | BitOr | BitXor | BitAnd | FloorDiv +# "Add", +# "Sub", +# "Mult", +# "MatMult", +# "Div", +# "Mod", +# "Pow", +# "LShift", +# "RShift", +# "BitOr", +# "BitXor", +# "BitAnd", +# "FloorDiv", +# # unaryop = Invert | Not | UAdd | USub +# "Invert", +# "Not", +# "UAdd", +# "USub", +# # cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn +# "Eq", +# "NotEq", +# "Lt", +# "LtE", +# "Gt", +# "GtE", +# "Is", +# "IsNot", +# "In", +# "NotIn", + +# # comprehension = (expr target, expr iter, expr* ifs, int is_async) +# "comprehension", +# # excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) +# # attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) +# "ExceptHandler", +# # arguments = (arg* posonlyargs, arg* args, arg? vararg, arg* kwonlyargs, +# # expr* kw_defaults, arg? kwarg, expr* defaults) +# "arguments", +# # arg = (identifier arg, expr? annotation, string? type_comment) +# # attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) +# "arg", +# # -- keyword arguments supplied to call (NULL identifier for **kwargs) +# # keyword = (identifier? arg, expr value) +# # attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) +# "keyword", +# # -- import name with optional 'as' alias. +# # alias = (identifier name, identifier? asname) +# # attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) +# "alias", +# # withitem = (expr context_expr, expr? optional_vars) +# "withitem", +# # match_case = (pattern pattern, expr? guard, stmt* body) +# "matchcase", +# # pattern = MatchValue(expr value) +# # | MatchSingleton(constant value) +# # | MatchSequence(pattern* patterns) +# # | MatchMapping(expr* keys, pattern* patterns, identifier? rest) +# # | MatchClass(expr cls, pattern* patterns, identifier* kwd_attrs, pattern* kwd_patterns) +# "MatchValue", +# "MatchSingleton", +# "MatchSequence", +# "MatchMapping", +# "MatchClass", +# # | MatchStar(identifier? name) +# # -- The optional "rest" MatchMapping parameter handles capturing extra mapping keys +# "MatchStar", +# # | MatchAs(pattern? pattern, identifier? name) +# # | MatchOr(pattern* patterns) +# "MatchAs", +# "MatchOr", +# # attributes (int lineno, int col_offset, int end_lineno, int end_col_offset) +# "attributes", +# # type_ignore = TypeIgnore(int lineno, string tag) +# "TypeIgnore", +# +# # visit methods for deprecated nodes +# "ExtSlice", +# "Index", +# "Suite", +# "AugLoad", +# "AugStore", +# "Param", +# "Num", +# "Str", +# "Bytes", +# "NameConstant", +# "Ellipsis", +# } diff --git a/pydependence/_core/module_imports_loader.py b/pydependence/_core/module_imports_loader.py new file mode 100644 index 0000000..6f698e2 --- /dev/null +++ b/pydependence/_core/module_imports_loader.py @@ -0,0 +1,90 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + + +import dataclasses +from typing import Dict, List + +from pydependence._core.module_data import ModuleMetadata +from pydependence._core.module_imports_ast import ( + LocImportInfo, + load_imports_from_module_info, +) + +# ========================================================================= # +# MODULE IMPORTS # +# ========================================================================= # + + +@dataclasses.dataclass +class ModuleImports: + module_info: ModuleMetadata + module_imports: "Dict[str, List[LocImportInfo]]" + + @classmethod + def from_module_info_and_parsed_file(cls, module_info: ModuleMetadata): + module_imports = load_imports_from_module_info(module_info=module_info) + return ModuleImports( + module_info=module_info, + module_imports=dict(module_imports), + ) + + +# ========================================================================= # +# MODULE IMPORTS LOADER # +# ========================================================================= # + + +class _ModuleImportsLoader: + + def __init__(self): + self._modules_imports: "Dict[str, ModuleImports]" = {} + + def load_module_imports(self, module_info: ModuleMetadata) -> ModuleImports: + v = self._modules_imports.get(module_info.name, None) + if v is None: + v = ModuleImports.from_module_info_and_parsed_file(module_info) + self._modules_imports[module_info.name] = v + else: + if v.module_info != module_info: + raise RuntimeError( + f"ModuleMetadata mismatch: {v.module_info} != {module_info}" + ) + return v + + +# GLOBAL INSTANCE +# TODO: can replace with disk cache +DEFAULT_MODULE_IMPORTS_LOADER = _ModuleImportsLoader() + + +# ========================================================================= # +# END # +# ========================================================================= # + + +__all__ = ( + "ModuleImports", + "DEFAULT_MODULE_IMPORTS_LOADER", +) diff --git a/pydependence/_core/modules_resolver.py b/pydependence/_core/modules_resolver.py new file mode 100644 index 0000000..60f7e9a --- /dev/null +++ b/pydependence/_core/modules_resolver.py @@ -0,0 +1,205 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + +import warnings +from collections import defaultdict +from typing import Dict, Iterable, List, NamedTuple, Optional, Set + +import networkx as nx + +from pydependence._core.builtin import BUILTIN_MODULE_NAMES +from pydependence._core.module_data import ModuleMetadata +from pydependence._core.module_imports_ast import LocImportInfo +from pydependence._core.module_imports_loader import ( + DEFAULT_MODULE_IMPORTS_LOADER, + ModuleImports, +) +from pydependence._core.modules_scope import NODE_KEY_MODULE_INFO, ModulesScope + +# ========================================================================= # +# IMPORT GRAPH # +# ========================================================================= # + + +NODE_KEY_MODULE_IMPORTS = "module_imports" +EDGE_KEY_IMPORTS = "imports" +EDGE_KEY_ALL_LAZY = "all_lazy" + + +class _ImportsGraphNodeData(NamedTuple): + module_info: "Optional[ModuleMetadata]" + module_imports: "Optional[ModuleImports]" + + @classmethod + def from_graph_node(cls, graph: "nx.DiGraph", node: str) -> "_ImportsGraphNodeData": + return cls( + module_info=graph.nodes[node].get(NODE_KEY_MODULE_INFO, None), + module_imports=graph.nodes[node].get(NODE_KEY_MODULE_IMPORTS, None), + ) + + +class _ImportsGraphEdgeData(NamedTuple): + imports: "List[LocImportInfo]" + all_lazy: "Optional[bool]" + + @classmethod + def from_graph_edge( + cls, graph: "nx.DiGraph", src: str, dst: str + ) -> "_ImportsGraphEdgeData": + edge_data = graph.edges[src, dst] + imports = edge_data.get(EDGE_KEY_IMPORTS, []) + all_lazy = edge_data.get(EDGE_KEY_ALL_LAZY, None) + return cls(imports=imports, all_lazy=all_lazy) + + +def _construct_module_import_graph( + scope: "ModulesScope", + *, + skip_lazy: bool, +) -> "nx.DiGraph": + """ + Supports same interface as `find_modules` but edges are instead constructed + from the module imports. + + This is the direct graph where nodes are modules, and edges represent their imports. + """ + g = nx.DiGraph() + for node, node_data in scope.iter_module_items(): + if node_data.module_info is None: + warnings.warn(f"Module info not found for: {repr(node)}, skipping...") + continue + # get module info + node_imports: ModuleImports = DEFAULT_MODULE_IMPORTS_LOADER.load_module_imports( + module_info=node_data.module_info + ) + # construct nodes & edges between nodes based on imports + # - edges don't always exist, so can't just rely on them to add all nodes. + g.add_node( + node, + **{NODE_KEY_MODULE_INFO: node_data, NODE_KEY_MODULE_IMPORTS: node_imports}, + ) + for imp, imports in node_imports.module_imports.items(): + if imports: + all_lazy = all(imp.is_lazy for imp in imports) + if skip_lazy and all_lazy: + continue + g.add_edge( + node, + imp, + **{EDGE_KEY_IMPORTS: imports, EDGE_KEY_ALL_LAZY: all_lazy}, + ) + return g + + +# ========================================================================= # +# MODULE GRAPH # +# ========================================================================= # + + +ImportsDict = Dict[str, List[LocImportInfo]] +ImportsSourcesLists = Dict[str, Dict[str, LocImportInfo]] + + +def _resolve_scope_imports( + scope: "ModulesScope", + start_scope: "Optional[ModulesScope]", + skip_lazy: bool, +) -> "ImportsDict": + if start_scope is None: + start_scope = scope + if not scope.is_scope_subset(start_scope): + raise ValueError("Start scope must be a subset of the parent scope!") + + # 1. construct + # - if all imports are lazy, then we don't need to traverse them! (depending on mode) + # - we have to filter BEFORE the bfs otherwise we will traverse wrong nodes. + import_graph = _construct_module_import_graph(scope=scope, skip_lazy=skip_lazy) + + # 2. now resolve imports from the starting point! + # - dfs along edges to get all imports MUST do ALL edges + # - this is why we don't use `dfs_edges` which visits nodes, and may skip edges. + # - each edge contains all imports along that edge, these should + # be added to the set of imports so that we can track all imports + imports = defaultdict(set) + for src, dst in nx.edge_dfs(import_graph, source=start_scope.iter_modules()): + edge_data = _ImportsGraphEdgeData.from_graph_edge(import_graph, src, dst) + imports[dst].update(edge_data.imports) + imports = {k: list(v) for k, v in imports.items()} + + # 3. convert to datatype + return imports + + +class ScopeResolvedImports: + + def __init__( + self, scope: "ModulesScope", start_scope: "ModulesScope", imports: "ImportsDict" + ): + self.__scope = scope + self.__start_scope = start_scope + self.__imports = imports + + @classmethod + def from_scope( + cls, + scope: "ModulesScope", + start_scope: "Optional[ModulesScope]" = None, + skip_lazy: bool = False, + ): + if start_scope is None: + start_scope = scope + imports = _resolve_scope_imports( + scope=scope, start_scope=start_scope, skip_lazy=skip_lazy + ) + return cls(scope=scope, start_scope=start_scope, imports=imports) + + def _filter_keys( + self, + keys: "Iterable[str]", + *, + exclude_in_search_space: bool = True, + exclude_builtins: bool = True, + ) -> "Set[str]": + keys = set(keys) + if exclude_in_search_space: + keys -= set(self.__scope.iter_modules()) + if exclude_builtins: + keys -= BUILTIN_MODULE_NAMES + return keys + + def get_imports(self) -> ImportsDict: + return {k: list(v) for k, v in self.__imports.items()} + + def get_imports_sources(self) -> ImportsSourcesLists: + _imports = defaultdict(lambda: defaultdict(list)) + for imp, imp_sources in self.__imports.items(): + for i in imp_sources: + # TODO: should this be the tagged name instead? + _imports[imp][i.source_module_info.name].append(i) + return {k: dict(v) for k, v in _imports.items()} + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/pydependence/_core/modules_scope.py b/pydependence/_core/modules_scope.py new file mode 100644 index 0000000..71064dd --- /dev/null +++ b/pydependence/_core/modules_scope.py @@ -0,0 +1,300 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # +import dataclasses +import warnings +from collections import defaultdict +from enum import Enum +from pathlib import Path +from typing import ( + DefaultDict, + Dict, + Iterable, + Iterator, + List, + NamedTuple, + Optional, + Sequence, + Set, + Tuple, + Union, +) + +import networkx as nx + +from pydependence._core.builtin import BUILTIN_MODULE_NAMES +from pydependence._core.module_data import ModuleMetadata +from pydependence._core.module_imports_ast import LocImportInfo +from pydependence._core.module_imports_loader import ( + DEFAULT_MODULE_IMPORTS_LOADER, + ModuleImports, +) +from pydependence._core.utils import assert_valid_import_name + +# ========================================================================= # +# MODULE GRAPH # +# ========================================================================= # + + +NODE_KEY_MODULE_INFO = "module_info" + + +class DuplicateModuleError(RuntimeError): + pass + + +class _ModuleGraphNodeData(NamedTuple): + module_info: "Optional[ModuleMetadata]" + + @classmethod + def from_graph_node(cls, graph: "nx.DiGraph", node: str) -> "_ModuleGraphNodeData": + return cls(module_info=graph.nodes[node].get(NODE_KEY_MODULE_INFO, None)) + + +def _find_modules( + search_paths: "Optional[Sequence[Path]]", + package_paths: "Optional[Sequence[Path]]", + tag: str, + reachable_only: bool = False, +) -> "nx.DiGraph": + """ + Construct a graph of all modules found in the search paths and package paths. + Edges are added from each module to its parent package (if exists). E.g. may be + missing if an `__init__.py` file is missing. + """ + g = nx.DiGraph() + + # load all search paths + if search_paths is not None: + for search_path in search_paths: + if not search_path.is_dir(): + raise NotADirectoryError( + f"Search path must be a directory, got: {search_path}" + ) + for m in ModuleMetadata.yield_search_path_modules(search_path, tag=tag): + if m.name in g: + dat = _ModuleGraphNodeData.from_graph_node(g, m.name) + raise DuplicateModuleError( + f"Duplicate module name: {repr(m.name)}, already exists as: {dat.module_info.path}, tried to add: {m.path}, from search path: {search_path}. " + f"These modules are incompatible and cannot be loaded together!" + ) + g.add_node(m.name, **{NODE_KEY_MODULE_INFO: m}) + + # load all package paths + if package_paths is not None: + for package_path in package_paths: + if not package_path.exists(): + raise FileNotFoundError(f"Package path does not exist: {package_path}") + for m in ModuleMetadata.yield_package_modules(package_path, tag=tag): + if m.name in g: + dat = _ModuleGraphNodeData.from_graph_node(g, m.name) + raise DuplicateModuleError( + f"Duplicate module name: {repr(m.name)}, already exists as: {dat.module_info.path}, tried to add: {m.path}, from package path: {package_path}. " + f"These modules are incompatible and cannot be loaded together!" + ) + g.add_node(m.name, **{NODE_KEY_MODULE_INFO: m}) + + # add all connections to parent packages + for node in g.nodes: + parts = node.split(".") + if len(parts) > 1: + parent = ".".join(parts[:-1]) + if g.has_node(parent): + g.add_edge(parent, node) + + # make sure there are no empty nodes, this is a bug! + if "" in g.nodes: + raise RuntimeError(f"[BUG] Empty module name found in graph: {g}") + + # reverse traverse from each node to the root to figure out which nodes are reachable, then filter them out. + if reachable_only: + reverse = g.reverse() + for node in list(g.nodes): + root = node.split(".")[0] + if not nx.has_path(reverse, node, root): + g.remove_node(node) + + # * DiGraph [ import_path -> Node(module_info) ] + return g + + +# ========================================================================= # +# NAMED MODULE NAMESPACE # +# ========================================================================= # + + +class RestrictMode(str, Enum): + EXACT = "EXACT" + CHILDREN = "CHILDREN" + ROOT_CHILDREN = "ROOT_CHILDREN" + + +class RestrictOp(str, Enum): + LIMIT = "LIMIT" # only include these + EXCLUDE = "EXCLUDE" # exclude these + + +class ModulesScope: + + def __init__(self): + self._module_graph = nx.DiGraph() + self.__import_graph_strict = None + self.__import_graph_lazy = None + + # ~=~=~ ADD MODULES ~=~=~ # + + def _merge_module_graph(self, graph: "nx.DiGraph") -> "ModulesScope": + # 1. get all nodes that are in both search spaces + nodes = set(self._module_graph.nodes) & set(graph.nodes) + if nodes: + raise DuplicateModuleError(f"Duplicate module names found: {sorted(nodes)}") + # 2. add all nodes from the other search space + self._module_graph = nx.compose(self._module_graph, graph) + self.__import_graph_strict = None + self.__import_graph_lazy = None + return self + + def add_modules_from_scope(self, search_space: "ModulesScope") -> "ModulesScope": + return self._merge_module_graph(graph=search_space._module_graph) + + def add_modules_from_raw_imports( + self, imports: List[str], tag: str + ) -> "ModulesScope": + g = nx.DiGraph() + for imp in imports: + g.add_node(imp) + return self._merge_module_graph(graph=g) + + def add_modules_from_search_path( + self, search_path: Path, tag: Optional[str] = None + ) -> "ModulesScope": + if tag is None: + tag = search_path.name + warnings.warn( + f"No tag provided for search path: {repr(search_path)}, using path name as tag: {repr(tag)}" + ) + graph = _find_modules(search_paths=[search_path], package_paths=None, tag=tag) + return self._merge_module_graph(graph=graph) + + def add_modules_from_package_path( + self, package_path: Path, tag: Optional[str] = None + ) -> "ModulesScope": + if tag is None: + tag = package_path.parent.name + warnings.warn( + f"No tag provided for package path: {repr(package_path)}, using parent name as tag: {repr(tag)}" + ) + graph = _find_modules(search_paths=None, package_paths=[package_path], tag=tag) + return self._merge_module_graph(graph=graph) + + # ~=~=~ MODULE INFO ~=~=~ # + + def iter_modules(self) -> "Iterator[str]": + yield from self._module_graph.nodes + + def iter_module_items(self) -> "Iterator[Tuple[str, _ModuleGraphNodeData]]": + for node in self._module_graph.nodes: + yield node, _ModuleGraphNodeData.from_graph_node(self._module_graph, node) + + def has_module(self, module_name: str) -> bool: + return module_name in self._module_graph + + def get_module_data(self, module_name: str) -> _ModuleGraphNodeData: + return _ModuleGraphNodeData.from_graph_node(self._module_graph, module_name) + + # ~=~=~ SCOPE OPS ~=~=~ # + + def is_scope_parent_set(self, other: "ModulesScope") -> bool: + return self._module_graph.nodes <= other._module_graph.nodes + + def is_scope_subset(self, other: "ModulesScope") -> bool: + return self._module_graph.nodes >= other._module_graph.nodes + + def is_scope_conflicts(self, other: "ModulesScope") -> bool: + return bool(self._module_graph.nodes & other._module_graph.nodes) + + def get_scope_conflicts(self, other: "ModulesScope") -> Set[str]: + return set(self._module_graph.nodes & other._module_graph.nodes) + + # ~=~=~ FILTER MODULES ~=~=~ # + + def get_restricted_scope( + self, + imports: Iterable[str], + *, + mode: RestrictMode = RestrictMode.CHILDREN, + op: RestrictOp = RestrictOp.LIMIT, + ) -> "ModulesScope": + assert not isinstance(imports, str) + imports = set(map(assert_valid_import_name, imports)) + # copy the graph + s = ModulesScope() + s._module_graph = self._module_graph.copy() + + # allowed + if mode == RestrictMode.ROOT_CHILDREN: + allowed = {(i.split("."),) for i in imports} + elif mode in (RestrictMode.EXACT, RestrictMode.CHILDREN): + allowed = {tuple(i.split(".")) for i in imports} + else: + raise ValueError(f"Invalid mode: {mode}") + + # filter the graph + for node in list(s._module_graph.nodes): + node_parts = tuple(node.split(".")) + # get the limited set of nodes + if mode == RestrictMode.EXACT: + remove = node_parts not in allowed + elif mode == RestrictMode.ROOT_CHILDREN: + remove = node_parts[:1] not in allowed + elif mode == RestrictMode.CHILDREN: + remove = not any( + node_parts[: i + 1] in allowed for i in range(len(node_parts)) + ) + else: + raise ValueError(f"Invalid mode: {mode}") + # apply the operation + if op == RestrictOp.LIMIT: + remove = remove + elif op == RestrictOp.EXCLUDE: + remove = not remove + else: + raise ValueError(f"Invalid operation: {op}") + # remove the node + if remove: + s._module_graph.remove_node(node) + # done! + return s + + +# ========================================================================= # +# END # +# ========================================================================= # + + +__all__ = ( + "DuplicateModuleError", + "ModulesScope", + "RestrictMode", + "RestrictOp", +) diff --git a/pydependence/_core/requirements_gen.py b/pydependence/_core/requirements_gen.py new file mode 100644 index 0000000..d5e2c16 --- /dev/null +++ b/pydependence/_core/requirements_gen.py @@ -0,0 +1,267 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + +import dataclasses +from collections import defaultdict +from enum import Enum +from typing import Dict, List, Optional + +from pydependence._core.builtin import BUILTIN_MODULE_NAMES +from pydependence._core.module_imports_ast import LocImportInfo +from pydependence._core.modules_resolver import ScopeResolvedImports +from pydependence._core.modules_scope import ModulesScope +from pydependence._core.requirements_map import ( + NoConfiguredRequirementMappingError, + RequirementsMapper, +) + +# ========================================================================= # +# REQUIREMENTS WRITER # +# ========================================================================= # + + +class WriteMode(str, Enum): + include = "include" + comment = "comment" + exclude = "exclude" + + @property + def priority(self) -> int: + return _WRITE_PRIORITIES[self] + + +_WRITE_PRIORITIES = { + WriteMode.include: 0, + WriteMode.comment: 1, + WriteMode.exclude: 2, +} + + +class WriteLevel(str, Enum): + start_scope = "start_scope" + parent_scope = "parent_scope" + external = "external" + none = "none" + + @property + def level(self) -> int: + return _WRITE_LEVELS[self] + + +_WRITE_LEVELS = { + WriteLevel.start_scope: 0, + WriteLevel.parent_scope: 1, + WriteLevel.external: 2, + WriteLevel.none: 3, +} + + +@dataclasses.dataclass +class WriteRules: + write_mode_is_builtin: WriteMode + write_mode_start_scope: WriteMode + # write_level_exclude: WriteLevel + # write_level_comment: WriteLevel + write_mode_is_lazy: WriteMode + + +# ========================================================================= # +# REQUIREMENTS GENERATOR # +# ========================================================================= # + + +@dataclasses.dataclass +class WriteRequirementSourceModule: + source_module: str + source_module_imports: List[LocImportInfo] + target_imports: List[str] + + # debugging + all_lazy: bool + any_target_in_parent_scope: bool + any_target_in_start_scope: bool + any_source_in_parent_scope: bool + any_source_in_start_scope: bool + is_builtin: bool + + # actual write mode + write_mode: WriteMode + + def apply_write_rules(self, rules: WriteRules): + mode = self.write_mode + if self.is_builtin: + mode = max(mode, rules.write_mode_is_builtin, key=lambda x: x.priority) + if self.all_lazy: + mode = max(mode, rules.write_mode_is_lazy, key=lambda x: x.priority) + self.write_mode = mode + return self + + +@dataclasses.dataclass +class WriteRequirement: + requirement: str + source_modules: List[WriteRequirementSourceModule] + + # debugging + is_builtin: bool + + # actual write mode + write_mode: WriteMode + + @property + def all_lazy(self): + return all(x.all_lazy for x in self.source_modules) + + @property + def any_target_in_parent_scope(self) -> bool: + return any(x.any_target_in_parent_scope for x in self.source_modules) + + @property + def any_target_in_start_scope(self) -> bool: + return any(x.any_target_in_start_scope for x in self.source_modules) + + @property + def any_source_in_parent_scope(self) -> bool: + return any(x.any_source_in_parent_scope for x in self.source_modules) + + @property + def any_source_in_start_scope(self) -> bool: + return any(x.any_source_in_start_scope for x in self.source_modules) + + def apply_write_rules(self, rules: WriteRules): + mode = self.write_mode + if self.is_builtin: + mode = max(mode, rules.write_mode_is_builtin, key=lambda x: x.priority) + if self.any_target_in_start_scope: + mode = max(mode, rules.write_mode_start_scope, key=lambda x: x.priority) + if self.all_lazy: + mode = max(mode, rules.write_mode_is_lazy, key=lambda x: x.priority) + self.write_mode = mode + return self + + +def generate_output_requirements( + scope: ModulesScope, + start_scope: Optional[ModulesScope], + requirements_mapper: RequirementsMapper, + requirements_env: str, + write_rules: WriteRules, +) -> "List[WriteRequirement]": + if start_scope is None: + start_scope = scope + + # resolve + resolved_explicit = ScopeResolvedImports.from_scope( + scope=scope, + start_scope=start_scope, + skip_lazy=True, + ) + resolved_all = ScopeResolvedImports.from_scope( + scope=scope, + start_scope=start_scope, + skip_lazy=False, + ) + + # 1. get imports + # - assert imports is a subset of imports_lazy + # {module: {source: [import_info, ...], ...}, ...} + imports_explicit = resolved_explicit.get_imports_sources() + imports_all = resolved_all.get_imports_sources() + extra = set(imports_explicit.keys()) - set(imports_all.keys()) + if extra: + raise RuntimeError( + f"imports_explicit must be a subset of imports_all, got extra imports: {extra}" + ) + + # 2. collect imports under requirements, BUT don't map builtins + # {requirement: {source: [import_info, ...], ...}, ...} + requirements_all: "Dict[str, Dict[str, List[LocImportInfo]]]" = defaultdict( + lambda: defaultdict(list) + ) + errors = [] + for imp in sorted(imports_all.keys()): + # - map import to requirement + try: + if imp in BUILTIN_MODULE_NAMES: + requirement = imp + else: + requirement = requirements_mapper.map_import_to_requirement( + imp, + requirements_env=requirements_env, + ) + # - add to requirements + for source, import_infos in imports_all[imp].items(): + assert {t.target for t in import_infos} == {imp} + requirements_all[requirement][source].extend(import_infos) + except NoConfiguredRequirementMappingError as e: + errors.append(e) + if errors: + err_imports = {imp for e in errors for imp in e.imports} + err_roots = {imp.split(".")[0] for imp in err_imports} + raise NoConfiguredRequirementMappingError( + msg=f"could not find mapped requirements for, roots: {sorted(err_roots)}, or full imports: {sorted(err_imports)}", + imports=err_imports, + ) + + # 3. generate write imports & apply write rules + write_reqs: "List[WriteRequirement]" = [] + for requirement in sorted(requirements_all.keys()): + # {source: [import_info, ...], ...} + requirement_sources = requirements_all[requirement] + + # - generate requirement sources + source_modules = [] + for source in sorted(requirement_sources.keys()): + target_imports = sorted({t.target for t in requirement_sources[source]}) + write_src = WriteRequirementSourceModule( + source_module=source, + source_module_imports=requirement_sources[source], + target_imports=target_imports, + all_lazy=all(v.is_lazy for v in requirement_sources[source]), + any_source_in_parent_scope=scope.has_module(source), + any_source_in_start_scope=start_scope.has_module(source), + any_target_in_parent_scope=any( + scope.has_module(t) for t in target_imports + ), + any_target_in_start_scope=any( + start_scope.has_module(t) for t in target_imports + ), + is_builtin=source in BUILTIN_MODULE_NAMES, + write_mode=WriteMode.include, + ) + write_src.apply_write_rules(write_rules) + source_modules.append(write_src) + + # - generate requirement + write_req = WriteRequirement( + requirement=requirement, + source_modules=source_modules, + is_builtin=requirement in BUILTIN_MODULE_NAMES, + write_mode=WriteMode.include, + ) + write_req.apply_write_rules(write_rules) + write_reqs.append(write_req) + + # done! + return write_reqs diff --git a/pydependence/_core/requirements_map.py b/pydependence/_core/requirements_map.py new file mode 100644 index 0000000..6c9f308 --- /dev/null +++ b/pydependence/_core/requirements_map.py @@ -0,0 +1,152 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + +import functools +import warnings +from typing import Dict, List, Set, Tuple + +from pydependence._core.modules_scope import ModulesScope + +# The version matching env +DEFAULT_REQUIREMENTS_ENV = "default" + + +# ========================================================================= # +# IMPORT MATCHER # +# ========================================================================= # + + +class ImportMatcherBase: + + def match(self, import_: str) -> bool: + raise NotImplementedError + + +class ImportMatcherScope(ImportMatcherBase): + + def __init__(self, scope: ModulesScope): + self.scope = scope + + def match(self, import_: str) -> bool: + return self.scope.has_module(import_) + + +class ImportMatcherGlob(ImportMatcherBase): + + def __init__(self, import_glob: str): + (*parts, last) = import_glob.split(".") + # check all parts are identifiers, OR, at least one identifier with the last part being a glob + if parts: + if not all(str.isidentifier(x) for x in parts): + raise ValueError( + f"parts of import glob {repr(import_glob)} are not valid identifiers" + ) + if not (str.isidentifier(last) or last == "*"): + raise ValueError( + f"last part of import glob {repr(import_glob)} is not a valid identifier or '*'" + ) + else: + if not str.isidentifier(last): + raise ValueError( + f"last part of import glob {repr(import_glob)} is not a valid identifier" + ) + # create glob + if last == "*": + self._parts = parts + self._wildcard = True + else: + self._parts = (*parts, last) + self._wildcard = False + self._base = ".".join(parts) + + def match(self, import_: str) -> bool: + if not self._wildcard: + return import_ == self._base + else: + parts = import_.split(".") + return self._parts == parts[: len(self._parts)] + + +# ========================================================================= # +# REQUIREMENTS MAPPER # +# ========================================================================= # + + +class NoConfiguredRequirementMappingError(ValueError): + + def __init__(self, msg: str, imports: Set[str]): + self.msg = msg + self.imports = imports + super().__init__(msg) + + +class RequirementsMapper: + + def __init__( + self, + *, + env_matchers: "Dict[str, List[Tuple[str, ImportMatcherBase]]]", + strict: bool = False, + ): + # env -> [(requirement, import matcher), ...] + # * we use a list to maintain order, and then linear search. This is because + # we could have multiple imports that match to the same requirement. + # we could potentially be stricter about this in future... + self._env_matchers = env_matchers + self._strict = strict + + @functools.lru_cache(maxsize=256) + def map_import_to_requirement(self, import_: str, requirements_env: str) -> str: + # 1. take the specific env + if requirements_env != DEFAULT_REQUIREMENTS_ENV: + if requirements_env not in self._env_matchers: + raise ValueError( + f"env: {repr(requirements_env)} has not been defined for a requirement." + ) + for requirement, matcher in self._env_matchers[requirements_env]: + if matcher.match(import_): + return requirement + # 2. take the default env + for requirement, matcher in self._env_matchers.get( + DEFAULT_REQUIREMENTS_ENV, [] + ): + if matcher.match(import_): + return requirement + # 3. return the root + if self._strict: + raise NoConfiguredRequirementMappingError( + msg=f"could not find a mapped requirement for import: {repr(import_)}, define a scope or glob matcher for this import, or set disable strict mode!", + imports={import_}, + ) + else: + root = import_.split(".")[0] + warnings.warn( + f"could not find a matching requirement for import: {repr(import_)}, returning the import root: {repr(root)} as the requirement" + ) + return root + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/pydependence/_core/requirements_writers.py b/pydependence/_core/requirements_writers.py new file mode 100644 index 0000000..160a926 --- /dev/null +++ b/pydependence/_core/requirements_writers.py @@ -0,0 +1,143 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + +from pathlib import Path +from typing import List, Union + +from pydependence._core.requirements_gen import ( + WriteMode, + WriteRequirement, + WriteRequirementSourceModule, +) +from pydependence._core.utils import is_absolute_path, load_toml_document + +# ========================================================================= # +# WRITER - TOML # +# ========================================================================= # + + +def read_and_dump_toml_imports( + *, + file: "Union[str, Path]", + keys: "List[str]", + requirements: "List[WriteRequirement]", +): + import tomlkit.items + + # load file + file = Path(file) + assert is_absolute_path(file), f"file must be an absolute path, got: {file}" + toml = load_toml_document(file) + + # add sections if missing + section = toml + for i, k in enumerate(keys): + if i < len(keys) - 1: + section = section.setdefault(k, {}) + assert isinstance(section, tomlkit.items.Table) + else: + section = section.setdefault(k, []) + assert isinstance(section, tomlkit.items.Array) + + # line writer + def add_line( + *line: str, + indents: int = 1, + comment: str = "", + all_comment: bool = False, + comment_after: bool = False, + ): + line = " ".join(line) + if all_comment: + items = [comment, f'"{line}"'] + if comment_after: + items = items[::-1] + items = [x for x in items if x] + comment = " ".join(items) if items else None + section.add_line(comment=comment, indent=" " * (4 * indents)) + else: + line = [line] if line else [] + section.add_line(*line, comment=comment or None, indent=" " * (4 * indents)) + + def str_modes_active( + item: "Union[WriteRequirement, WriteRequirementSourceModule]", prefix: str = "" + ): + active = [] + if item.all_lazy: + active.append("L") + if item.any_source_in_start_scope: + pass + elif item.any_source_in_parent_scope: + active.append("e") + else: + active.append("E") + if item.is_builtin: + active.append("B") + string = f"[{''.join(active)}]" if active else "" + if prefix and string: + return f"{prefix} {string}" if prefix and string else f"{prefix}{string}" + else: + return f"{prefix}{string}" + + # 1. write imports as strings into array with new lines + section.clear() + if requirements: + add_line(comment="[AUTOGEN] by pydependence **DO NOT EDIT** [AUTOGEN]") + + # if import source is in the base, but not the start, then collapse it to the root + for req in requirements: + # skip if needed + if req.write_mode == WriteMode.exclude: + continue + # * write the requirement + add_line( + req.requirement, + comment=str_modes_active(req), + all_comment=req.write_mode == WriteMode.comment, + comment_after=True, + ) + # * write the sources + for src in req.source_modules: + if src.write_mode == WriteMode.exclude: + continue + # - write the source + add_line( + src.source_module, + comment=str_modes_active(src, prefix=f"←"), + all_comment=True, + indents=2, + ) + + # add a new line + if requirements: + section.add_line(indent="") + + # write + with open(file, "w") as fp: + tomlkit.dump(toml, fp) + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/pydependence/_core/utils.py b/pydependence/_core/utils.py new file mode 100644 index 0000000..67ea1b0 --- /dev/null +++ b/pydependence/_core/utils.py @@ -0,0 +1,129 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # +import typing +from pathlib import Path +from typing import Union + +# never actually imported at runtime, but used for type hints in IDEs +if typing.TYPE_CHECKING: + from tomlkit import TOMLDocument + + +# ========================================================================= # +# AST IMPORT PARSER # +# ========================================================================= # + + +def assert_valid_tag(tag: str) -> str: + if not tag: + raise ValueError(f"Tag must not be empty: {tag}") + if not tag.replace("-", "_").isidentifier(): + raise NameError(f"Tag must be a valid identifier: {tag}") + return tag + + +def assert_valid_module_path(path: "Union[Path, str]") -> Path: + path = Path(path) + if not path.is_absolute(): + raise ValueError(f"Path must be absolute: {path}") + if not path.exists(): + raise FileNotFoundError(f"File does not exist: {path}") + if not path.is_file(): + raise RuntimeError(f"Path is not a file: {path}") + return path + + +def assert_valid_import_name(import_: str) -> str: + parts = import_.split(".") + if not parts: + raise ValueError( + f"import path must have at least one part for: {repr(import_)}" + ) + for part in parts: + if not part.isidentifier(): + raise NameError( + f"import part: {repr(part)} is not a valid identifier, obtained from: {repr(import_)}" + ) + return import_ + + +# ========================================================================= # +# PATH HELPER # +# ========================================================================= # + + +def is_relative_path(path: Union[str, Path]) -> bool: + # '..' should be considered a relative path + # '.' should be considered a relative path + # `not is_absolute` is not enough! + return Path(path).is_relative_to(Path(".")) + + +def is_absolute_path(path: Union[str, Path]) -> bool: + return not is_relative_path(path) + + +def apply_root_to_path_str(root: Union[str, Path], path: Union[str, Path]) -> str: + if is_relative_path(root): + raise ValueError(f"root must be an absolute path, got: {root}") + if is_absolute_path(path): + path = Path(path) + else: + path = Path(root) / path + return str(path.resolve()) + + +# ========================================================================= # +# LOAD # +# ========================================================================= # + + +def load_toml_document(path: Union[str, Path]) -> "TOMLDocument": + import tomlkit + from tomlkit import TOMLDocument + + path = Path(path) + if not path.name.endswith(".toml"): + raise ValueError(f"path is not a .toml file: {path}") + if not path.is_file(): + raise FileNotFoundError(f"path is not a file: {path}") + with open(path) as fp: + toml = tomlkit.load(fp) + assert isinstance(toml, TOMLDocument), f"got {type(toml)}, not TOMLDocument" + return toml + + +# ========================================================================= # +# END # +# ========================================================================= # + + +__all__ = ( + "assert_valid_module_path", + "assert_valid_import_name", + "is_relative_path", + "is_absolute_path", + "apply_root_to_path_str", + "load_toml_document", +) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2bd9b46 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,270 @@ +# =========================================== # +# BUILD SYSTEM # +# =========================================== # + +[build-system] +requires = ["setuptools>=61", "setuptools_scm[toml]>=6.2"] +build-backend = "setuptools.build_meta" + +# =========================================== # +# TOOLS # +# =========================================== # + +# packing tools +[tool.setuptools] +packages = ["pydependence"] + +# obtain the version from git +# https://github.com/pypa/setuptools_scm +#[tool.setuptools_scm] +#fallback_version = "0.0.0" + +# automated tests +[tool.pytest] +minversion = "6.0" +testpaths = ["tests", "pydependence"] +python_files = ["test.py", "test_*.py", "__test__*.py"] + +# - - - - - - - - - - - - - - - - - - - - - - # +# BOOTSTRAPPED PYDEPENDANCE SETTINGS # +# - - - - - - - - - - - - - - - - - - - - - - # + +# PYDEPENDENCE: +# ============= + +# pydependence is an AST imports analysis tool that is used to discover the imports of a package and generate +# a dependency graph and requirements/pyproject sections. + +# pydependence is NOT a package manager or a dependency resolver. +# This is left to the tool of your choice, e.g. pip, poetry, pip-compile, etc. + + +# VERSION MAPPINGS: +# ================= + +# Versions are used to specify the version of a package that should be used when generating output requirements. +# - If a version is not specified, an error will be raised. + +# Versions are also used to construct mappings between package names and import names. +# - e.g. Pillow is imported as PIL, so the version mapping is {package="pillow", version="*", import="PIL"} + +# Resolving optional dependencies? + +# SCOPE: +# ====== + +# A scope is a logical collection of packages. +# It is a way to group packages together for the purpose of dependency resolution. +# - NOTE: there cannot be conflicting module imports within a scope. + +# Scopes can inherit from other scopes. +# Scopes can have filters applied to them, include & exclude. +# Scopes must have unique names. + +# The order of constructing a single scope is important. +# 1. parents, search_paths, pkg_paths > [packages:NotImplemented] +# - parents: inherit all modules from the specified scopes +# - search_paths: search for packages inside the specified paths (like PYTHONPATH) +# - pkg_paths: add the packages at the specified paths +# 2. limit, include, exclude +# - limit: limit the search space to children of the specified packages +# - include: include packages that match the specified patterns +# - exclude: exclude packages that match the specified patterns + +# The order of evaluation when constucting multiple scopes is important, and can +# be used to create complex dependency resolution strategies. +# - all scopes are constructed in order of definition + +# SUBSCOPES: +# ========== + +# A subscope is simply an alias for constructing a new scope, where: +# - the parent scope is the current scope +# - a filter is applied to limit the packages + +# e.g. +# ``` +# [[tool.pydependence.scopes]] +# name = "my_pkg" +# pkg_paths = ["my_pkg"] +# subscopes = {mySubPkg="my_pkg.my_sub_pkg"} +# ``` +# is the same as: +# ``` +# [[tool.pydependence.scopes]] +# name = "my_pkg" +# pkg_paths = ["my_pkg"] +# +# [[tool.pydependence.scopes]] +# name = "mySubPkg" +# parents = ["my_pkg"] +# limit = ["my_pkg.my_sub_pkg"] +# ``` + +# why? +# - This simplifies syntax for the common pattern of when you want to resolve optional dependencies +# across an entire package, but only want to traverse starting from the subscope. + +# RESOLVERS: +# ========== + +# Resolvers are used to specify how to resolve dependencies, and where to output the results. + +# options: +# * scope: +# - is used to determine the search space for the resolver. +# * start_scope: +# - is used to determine the starting point for the resolver, i.e. BFS across all imports occurs from this point. +# * output_mode: +# - is used to determine where to output the results. +# - valid options are: `dependencies`, `optional-dependencies` TODO: add more like `requirements` +# * output_file: +# - is used to specify the file to output the results to, by default this is the current `pyproject.toml` file. +# this usually only needs to be specified when outputting to a different file like `requirements.txt` +# * mode: +# - is used to determine the mode of the resolver and if it should traverse all imports or only lazy/eager imports. +# - valid options are: `all`, `lazy_only`, `eager_only` +# * include: +# - TODO +# * exclude: +# - TODO + +# Note: We can have multiple resolvers to construct different sets of outputs. For example if you have a library +# with core dependencies and optional dependencies, you can construct a resolver for each. And limit the results +# for the optional dependencies to only output the optional dependencies for that resolver. + +# general settings / overrides +[tool.pydependence] + +# default, don't need to specify +# - relative to the parent of this file +default_root = ".." + +# default, don't need to specify +default_write_rules = {builtin = "exclude", start_scope = "exclude", lazy = "comment"} + +# default, don't need to specify +strict_requirements_map = true + +# global allowed package versions & version to import map +versions = [ + "networkx", + "pydantic>=2.0.0", + {requirement="pydantic<2.0.0,>=1.0.0", env='legacy'}, + "stdlib_list", + "tomlkit", + "packaging", + {requirement="pydependence", scope="pydependence"}, +] + +resolvers = [ + # output_name is optional, by default it is the same as the scope name, or sub scope + {output_mode='dependencies', scope='pydependence', start_scope='pydependence-core'}, + {output_mode='optional-dependencies', scope='pydependence', output_name='example_core_no_lazy'}, + {output_mode='optional-dependencies', scope='pydependence', env='legacy', output_name='example_legacy', write_rules = {start_scope = "exclude", lazy = "include"}}, +] + +# collections of packages and dependencies that will then be resolved. +[[tool.pydependence.scopes]] +name = "pydependence" +pkg_paths = "pydependence/pydependence" + +[[tool.pydependence.scopes]] +name = "pydependence-core" +parents = ["pydependence"] +limit = "pydependence._core" + + +# =========================================== # +# PROJECT # +# =========================================== # + +[project] +name = "pydependence" +dynamic = ["version"] +description = "Python local package dependency discovery, resolution and requirements generation." +urls = {repository = "https://github.com/nmichlo/pydependence"} +authors = [{name = "Nathan Michlo", email = "nathanjmichlo@gmail.com"}] +readme = "README.md" +license = {file="LICENSE"} +classifiers = [ + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] + +# - - - - - - - - - - - - - - - - - - - - - - # +# [AUTOGEN] PROJECT: DEPS [AUTOGEN] # +# !!! do not modify this section manually !!! # +# !!! do not modify this section manually !!! # +# !!! do not modify this section manually !!! # +# - - - - - - - - - - - - - - - - - - - - - - # + +requires-python = ">=3.8" +dependencies = [ + # [AUTOGEN] by pydependence **DO NOT EDIT** [AUTOGEN] + "networkx", + # ← "pydependence._core.modules_resolver" + # ← "pydependence._core.modules_scope" + "stdlib_list", + # ← "pydependence._core.builtin" + # "tomlkit" [L] + # ← [L] "pydependence._core.requirements_writers" + # ← [L] "pydependence._core.utils" +] + +# - - - - - - - - - - - - - - - - - - - - - - # +# [AUTOGEN] PROJECT: OPTIONAL DEPS [AUTOGEN] # +# !!! do not modify this section manually !!! # +# !!! do not modify this section manually !!! # +# !!! do not modify this section manually !!! # +# - - - - - - - - - - - - - - - - - - - - - - # + +[project.optional-dependencies] +dev = [ + "pre-commit>=2.19.0", + "black>=22.8.0", + "isort>=5.10.1", + "pyupgrade>=2.37.3", +] + +test = [ + "pytest>=6.2.4", + "pytest-cov>=2.12.1", +] + +example_core_no_lazy = [ + # [AUTOGEN] by pydependence **DO NOT EDIT** [AUTOGEN] + "networkx", + # ← "pydependence._core.modules_resolver" + # ← "pydependence._core.modules_scope" + "packaging", + # ← "pydependence.__main__" + "pydantic>=2.0.0", + # ← "pydependence.__main__" + "stdlib_list", + # ← "pydependence._core.builtin" + # "tomlkit" [L] + # ← [L] "pydependence._core.requirements_writers" + # ← [L] "pydependence._core.utils" +] + +example_legacy = [ + # [AUTOGEN] by pydependence **DO NOT EDIT** [AUTOGEN] + "networkx", + # ← "pydependence._core.modules_resolver" + # ← "pydependence._core.modules_scope" + "packaging", + # ← "pydependence.__main__" + "pydantic<2.0.0,>=1.0.0", + # ← "pydependence.__main__" + "stdlib_list", + # ← "pydependence._core.builtin" + "tomlkit", # [L] + # ← [L] "pydependence._core.requirements_writers" + # ← [L] "pydependence._core.utils" +] diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 35015a6..0000000 --- a/pytest.ini +++ /dev/null @@ -1,10 +0,0 @@ - -[pytest] -minversion = 6.0 -testpaths = - tests - pydependance -python_files = - test.py - test_*.py - __test__*.py diff --git a/requirements-precommit.txt b/requirements-precommit.txt deleted file mode 100644 index d8c339b..0000000 --- a/requirements-precommit.txt +++ /dev/null @@ -1,4 +0,0 @@ -pre-commit>=2.19.0 -black>=22.8.0 -isort>=5.10.1 -pyupgrade>=2.37.3 diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index 412de73..0000000 --- a/requirements-test.txt +++ /dev/null @@ -1,2 +0,0 @@ -pytest>=6.2.4 -pytest-cov>=2.12.1 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 932b96f..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pip>=21.0 -tomlkit>=0.11.0 -pydantic>=1.0.0 diff --git a/setup.py b/setup.py deleted file mode 100644 index e99adf5..0000000 --- a/setup.py +++ /dev/null @@ -1,43 +0,0 @@ -import setuptools - -# ========================================================================= # -# HELPER # -# ========================================================================= # - - -with open("README.md", "r", encoding="utf-8") as file: - long_description = file.read() - -with open("requirements.txt", "r") as f: - install_requires = (req[0] for req in map(lambda x: x.split("#"), f.readlines())) - install_requires = [req for req in map(str.strip, install_requires) if req] - - -# ========================================================================= # -# SETUP # -# ========================================================================= # - - -setuptools.setup( - name="pydependance", - author="Nathan Juraj Michlo", - author_email="NathanJMichlo@gmail.com", - version="0.1.0.dev4", - python_requires=">=3.10", - packages=setuptools.find_packages(), - install_requires=install_requires, - url="https://github.com/nmichlo/pydependance", - description="Python local package dependency discovery and resolution", - long_description=long_description, - long_description_content_type="text/markdown", - classifiers=[ - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.10", - ], -) - - -# ========================================================================= # -# END # -# ========================================================================= # diff --git a/tests/test-packages/t_ast_parser.py b/tests/test-packages/t_ast_parser.py new file mode 100644 index 0000000..fbfbd64 --- /dev/null +++ b/tests/test-packages/t_ast_parser.py @@ -0,0 +1,18 @@ +import os +from sys import path + +from foo.bar import asdf + +from .package import sub_module + + +def test_func(): + import json + import sys + + from asdf.fdsa import foo as bar + + +asdf = lazy_import("buzz") +fdsa = lazy_callable("bazz") +foo = lazy_inheritable("bar") diff --git a/tests/test.py b/tests/test.py deleted file mode 100644 index ab7bcc8..0000000 --- a/tests/test.py +++ /dev/null @@ -1,37 +0,0 @@ -# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -# MIT License -# -# Copyright (c) 2022 Nathan Juraj Michlo -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ - - -# ========================================================================= # -# TESTS # -# ========================================================================= # - - -def test_example(): - print("this test should pass!") - - -# ========================================================================= # -# END # -# ========================================================================= # diff --git a/tests/test_ast_parser.py b/tests/test_ast_parser.py new file mode 100644 index 0000000..8791866 --- /dev/null +++ b/tests/test_ast_parser.py @@ -0,0 +1,153 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2024 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # + +from pathlib import Path + +from pydependence._core.module_data import ModuleMetadata +from pydependence._core.module_imports_ast import ( + LocImportInfo, + load_imports_from_module_info, +) + +PKG_ROOT = Path(__file__).parent / "test-packages" + +TEST_MODULE_INFO = ModuleMetadata.from_root_and_subpath( + root=PKG_ROOT, + subpath=PKG_ROOT / "t_ast_parser.py", + tag="test", +) + +# ========================================================================= # +# TESTS # +# ========================================================================= # + + +def test_ast_get_module_imports(tmp_path): + # checks + results = load_imports_from_module_info(TEST_MODULE_INFO) + + a = { + "asdf.fdsa": [ + LocImportInfo( + source_module_info=TEST_MODULE_INFO, + source="import_from", + target="asdf.fdsa", + is_lazy=True, + lineno=13, + col_offset=4, + stack_type_names=("Module", "FunctionDef", "ImportFrom"), + is_relative=False, + ) + ], + "buzz": [ + LocImportInfo( + source_module_info=TEST_MODULE_INFO, + source="lazy_plugin", + target="buzz", + is_lazy=True, + lineno=16, + col_offset=7, + stack_type_names=("Module", "Assign", "Call"), + is_relative=False, + ) + ], + "foo.bar": [ + LocImportInfo( + source_module_info=TEST_MODULE_INFO, + source="import_from", + target="foo.bar", + is_lazy=False, + lineno=4, + col_offset=0, + stack_type_names=("Module", "ImportFrom"), + is_relative=False, + ) + ], + "json": [ + LocImportInfo( + source_module_info=TEST_MODULE_INFO, + source="import_", + target="json", + is_lazy=True, + lineno=10, + col_offset=4, + stack_type_names=("Module", "FunctionDef", "Import"), + is_relative=False, + ) + ], + "os": [ + LocImportInfo( + source_module_info=TEST_MODULE_INFO, + source="import_", + target="os", + is_lazy=False, + lineno=1, + col_offset=0, + stack_type_names=("Module", "Import"), + is_relative=False, + ) + ], + "sys": [ + LocImportInfo( + source_module_info=TEST_MODULE_INFO, + source="import_from", + target="sys", + is_lazy=False, + lineno=2, + col_offset=0, + stack_type_names=("Module", "ImportFrom"), + is_relative=False, + ), + LocImportInfo( + source_module_info=TEST_MODULE_INFO, + source="import_", + target="sys", + is_lazy=True, + lineno=11, + col_offset=4, + stack_type_names=("Module", "FunctionDef", "Import"), + is_relative=False, + ), + ], + "package": [ + LocImportInfo( + source_module_info=TEST_MODULE_INFO, + source="import_from", + target="package", + is_lazy=False, + lineno=6, + col_offset=0, + stack_type_names=("Module", "ImportFrom"), + is_relative=True, + ) + ], + } + + assert set(results.keys()) == set(a.keys()) + assert a == results + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/tests/test_type_module.py b/tests/test_type_module.py new file mode 100644 index 0000000..d935282 --- /dev/null +++ b/tests/test_type_module.py @@ -0,0 +1,186 @@ +# ============================================================================== # +# MIT License # +# # +# Copyright (c) 2023 Nathan Juraj Michlo # +# # +# Permission is hereby granted, free of charge, to any person obtaining a copy # +# of this software and associated documentation files (the "Software"), to deal # +# in the Software without restriction, including without limitation the rights # +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # +# copies of the Software, and to permit persons to whom the Software is # +# furnished to do so, subject to the following conditions: # +# # +# The above copyright notice and this permission notice shall be included in all # +# copies or substantial portions of the Software. # +# # +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # +# SOFTWARE. # +# ============================================================================== # +import dataclasses +import os +from pathlib import Path + +import pytest + +from pydependence._core._OLD_type_import import Import +from pydependence._core._OLD_type_module import ( + ModuleFile, + ModuleImportStatement, + _package_normalized_module_paths, + _path_is_python_module_file, + _path_is_python_package_dir, + iter_modules_from_package_root, + iter_modules_from_python_path, +) + +# ========================================================================= # +# TESTS # +# ========================================================================= # + + +@dataclasses.dataclass +class TestPaths: + root: Path + root_file: Path + root_pkg: Path + root_mod: Path + root_pkg_init: Path + root_pkg_mod: Path + + +_T_ROOT = "test_directory" +_T_ROOT_FILE = os.path.join(_T_ROOT, "test_file.txt") +_T_ROOT_MOD = os.path.join(_T_ROOT, "test_module.py") +_T_ROOT_PKG = os.path.join(_T_ROOT, "test_package") +_T_ROOT_PKG_INIT = os.path.join(_T_ROOT_PKG, "__init__.py") +_T_ROOT_PKG_MOD = os.path.join(_T_ROOT_PKG, "sub_module.py") + +_I_ROOT_MOD = "os.path" +_C_ROOT_MOD = f"import {_I_ROOT_MOD}" +_I_ROOT_PKG_MOD = "os" +_C_ROOT_PKG_MOD = f"import {_I_ROOT_PKG_MOD}" + + +@pytest.fixture(scope="session", autouse=True) +def test_paths(tmpdir_factory) -> TestPaths: + root = Path(tmpdir_factory.mktemp(_T_ROOT)) + # paths + paths = TestPaths( + root=root.joinpath(_T_ROOT), + root_file=root.joinpath(_T_ROOT_FILE), + root_pkg=root.joinpath(_T_ROOT_PKG), + root_mod=root.joinpath(_T_ROOT_MOD), + root_pkg_init=root.joinpath(_T_ROOT_PKG_INIT), + root_pkg_mod=root.joinpath(_T_ROOT_PKG_MOD), + ) + # make dirs + paths.root.mkdir(parents=True) + paths.root_pkg.mkdir(parents=True) + # fill files + paths.root_file.write_text("") + paths.root_mod.write_text(_C_ROOT_MOD) + paths.root_pkg_init.write_text("") + paths.root_pkg_mod.write_text(_C_ROOT_PKG_MOD) + # get files + return paths + + +def test_module_import(test_paths: TestPaths): + target_import = Import(_I_ROOT_MOD) + source_module = ModuleFile(test_paths.root_mod, _I_ROOT_MOD) + module_import = ModuleImportStatement(target_import, source_module, False) + + assert module_import.target == target_import + assert module_import.source == source_module + + +def test__path_is_python_module(test_paths: TestPaths): + assert _path_is_python_module_file(test_paths.root_mod) == True + assert _path_is_python_module_file(test_paths.root_file) == False + + +def test__path_is_python_package(test_paths: TestPaths): + assert _path_is_python_package_dir(test_paths.root_pkg) == True + assert _path_is_python_package_dir(test_paths.root_mod) == False + + +def test__package_normalized_module_path(test_paths: TestPaths): + # make sure files are not packages and that they have .py removed for the import path + assert _package_normalized_module_paths(test_paths.root_mod) == ( + test_paths.root_mod, + False, + test_paths.root_mod.with_name("test_module"), + ) + # make sure __init__.py is remove for the import path + assert _package_normalized_module_paths(test_paths.root_pkg_init) == ( + test_paths.root_pkg_init, + True, + test_paths.root_pkg_init.parent, + ) + # make sure dirs have the __init__.py added + assert _package_normalized_module_paths(test_paths.root_pkg) == ( + test_paths.root_pkg_init, + True, + test_paths.root_pkg_init.parent, + ) + with pytest.raises(ValueError): + _package_normalized_module_paths(test_paths.root_file) + + +def test_module_file(test_paths: TestPaths): + module_file = ModuleFile(test_paths.root_mod, "os.path") + assert module_file.abs_file_path == test_paths.root_mod + assert module_file.is_package == False + assert module_file.module_import.keys == ("os", "path") + assert isinstance(module_file.import_statements, list) + assert isinstance(module_file.import_statements[0], ModuleImportStatement) + + +def test_find_modules(test_paths: TestPaths): + from_pkg_root = lambda root: list(iter_modules_from_package_root(root=root)) + from_py_path = lambda python_path: list( + iter_modules_from_python_path(python_path=python_path) + ) + + # direct package root style searching + with pytest.raises(ValueError): + from_pkg_root(test_paths.root) + with pytest.raises(ValueError): + from_pkg_root(test_paths.root_file) + [r_test_pkg_init, r_test_pkg_mod] = from_pkg_root(test_paths.root_pkg) + [r_test_mod] = from_pkg_root(test_paths.root_mod) + with pytest.raises(ValueError): + from_pkg_root(test_paths.root_pkg_init) + with pytest.raises(ValueError): + from_pkg_root(test_paths.root_pkg_mod) + + # PYTHON_PATH style searching + [p_test_mod, p_test_pkg_init, p_test_pkg_mod] = from_py_path(test_paths.root) + with pytest.raises(NotADirectoryError): + from_py_path(test_paths.root_file) + with pytest.raises(ValueError): + from_py_path(test_paths.root_pkg) + with pytest.raises(NotADirectoryError): + from_py_path(test_paths.root_mod) + with pytest.raises(NotADirectoryError): + from_py_path(test_paths.root_pkg_init) + with pytest.raises(NotADirectoryError): + from_py_path(test_paths.root_pkg_mod) + + assert isinstance(r_test_pkg_init, ModuleFile) + assert isinstance(r_test_pkg_mod, ModuleFile) + assert isinstance(r_test_mod, ModuleFile) + + assert r_test_pkg_init == p_test_pkg_init + assert r_test_pkg_mod == p_test_pkg_mod + assert r_test_mod == p_test_mod + + +# ========================================================================= # +# END # +# ========================================================================= #