diff --git a/news/12063.removal.rst b/news/12063.removal.rst new file mode 100644 index 00000000000..03d785d69c5 --- /dev/null +++ b/news/12063.removal.rst @@ -0,0 +1,4 @@ +Remove support for legacy versions and dependency specifiers. Packages with non +standard-compliant versions or dependency specifiers are now ignored by the resolver. +Already installed packages with non standard-compliant versions or dependency specifiers +must be uninstalled before upgrading them. diff --git a/news/packaging.vendor.rst b/news/packaging.vendor.rst new file mode 100644 index 00000000000..08f099e1cb3 --- /dev/null +++ b/news/packaging.vendor.rst @@ -0,0 +1 @@ +Upgrade packaging to 24.0 diff --git a/news/pyparsing.vendor.rst b/news/pyparsing.vendor.rst index d6aab47079f..37a586a5651 100644 --- a/news/pyparsing.vendor.rst +++ b/news/pyparsing.vendor.rst @@ -1 +1 @@ -Upgrade pyparsing to 3.1.2 +Remove pyparsing diff --git a/src/pip/_internal/commands/check.py b/src/pip/_internal/commands/check.py index 5efd0a34160..584df9f55c5 100644 --- a/src/pip/_internal/commands/check.py +++ b/src/pip/_internal/commands/check.py @@ -7,7 +7,6 @@ from pip._internal.operations.check import ( check_package_set, create_package_set_from_installed, - warn_legacy_versions_and_specifiers, ) from pip._internal.utils.misc import write_output @@ -22,7 +21,6 @@ class CheckCommand(Command): def run(self, options: Values, args: List[str]) -> int: package_set, parsing_probs = create_package_set_from_installed() - warn_legacy_versions_and_specifiers(package_set) missing, conflicting = check_package_set(package_set) for project_name in missing: diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 54247a78a65..917bbb91d83 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -139,7 +139,6 @@ def run(self, options: Values, args: List[str]) -> int: downloaded.append(req.name) preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) - requirement_set.warn_legacy_versions_and_specifiers() if downloaded: write_output("Successfully downloaded %s", " ".join(downloaded)) diff --git a/src/pip/_internal/commands/index.py b/src/pip/_internal/commands/index.py index f55e9e49974..2e2661bba71 100644 --- a/src/pip/_internal/commands/index.py +++ b/src/pip/_internal/commands/index.py @@ -1,8 +1,8 @@ import logging from optparse import Values -from typing import Any, Iterable, List, Optional, Union +from typing import Any, Iterable, List, Optional -from pip._vendor.packaging.version import LegacyVersion, Version +from pip._vendor.packaging.version import Version from pip._internal.cli import cmdoptions from pip._internal.cli.req_command import IndexGroupCommand @@ -115,7 +115,7 @@ def get_available_package_versions(self, options: Values, args: List[Any]) -> No ignore_requires_python=options.ignore_requires_python, ) - versions: Iterable[Union[LegacyVersion, Version]] = ( + versions: Iterable[Version] = ( candidate.version for candidate in finder.find_all_candidates(query) ) diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py index 6cf7571e4a6..a9e83f9d8cd 100644 --- a/src/pip/_internal/commands/install.py +++ b/src/pip/_internal/commands/install.py @@ -387,9 +387,6 @@ def run(self, options: Values, args: List[str]) -> int: json.dump(report.to_dict(), f, indent=2, ensure_ascii=False) if options.dry_run: - # In non dry-run mode, the legacy versions and specifiers check - # will be done as part of conflict detection. - requirement_set.warn_legacy_versions_and_specifiers() would_install_items = sorted( (r.metadata["name"], r.metadata["version"]) for r in requirement_set.requirements_to_install diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index f510919910e..cb1e313f468 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Generator, List, Optional, Sequence, Tuple, cast from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.packaging.version import Version from pip._internal.cli import cmdoptions from pip._internal.cli.req_command import IndexGroupCommand @@ -18,7 +19,6 @@ from pip._internal.utils.misc import tabulate, write_output if TYPE_CHECKING: - from pip._internal.metadata.base import DistributionVersion class _DistWithLatestInfo(BaseDistribution): """Give the distribution object a couple of extra fields. @@ -27,7 +27,7 @@ class _DistWithLatestInfo(BaseDistribution): makes the rest of the code much cleaner. """ - latest_version: DistributionVersion + latest_version: Version latest_filetype: str _ProcessedDists = Sequence[_DistWithLatestInfo] @@ -333,7 +333,7 @@ def format_for_columns( for proj in pkgs: # if we're working on the 'outdated' list, separate out the # latest_version and type - row = [proj.raw_name, str(proj.version)] + row = [proj.raw_name, proj.raw_version] if running_outdated: row.append(str(proj.latest_version)) diff --git a/src/pip/_internal/commands/show.py b/src/pip/_internal/commands/show.py index b7894ce1f9c..c54d548f5fb 100644 --- a/src/pip/_internal/commands/show.py +++ b/src/pip/_internal/commands/show.py @@ -2,6 +2,7 @@ from optparse import Values from typing import Generator, Iterable, Iterator, List, NamedTuple, Optional +from pip._vendor.packaging.requirements import InvalidRequirement from pip._vendor.packaging.utils import canonicalize_name from pip._internal.cli.base_command import Command @@ -100,12 +101,19 @@ def _get_requiring_packages(current_dist: BaseDistribution) -> Iterator[str]: except KeyError: continue - requires = sorted( - # Avoid duplicates in requirements (e.g. due to environment markers). - {req.name for req in dist.iter_dependencies()}, - key=str.lower, - ) - required_by = sorted(_get_requiring_packages(dist), key=str.lower) + try: + requires = sorted( + # Avoid duplicates in requirements (e.g. due to environment markers). + {req.name for req in dist.iter_dependencies()}, + key=str.lower, + ) + except InvalidRequirement: + requires = sorted(dist.iter_raw_dependencies(), key=str.lower) + + try: + required_by = sorted(_get_requiring_packages(dist), key=str.lower) + except InvalidRequirement: + required_by = ["#N/A"] try: entry_points_text = dist.read_text("entry_points.txt") @@ -139,7 +147,7 @@ def _get_requiring_packages(current_dist: BaseDistribution) -> Iterator[str]: yield _PackageInfo( name=dist.raw_name, - version=str(dist.version), + version=dist.raw_version, location=dist.location or "", editable_project_location=dist.editable_project_location, requires=requires, diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py index ed578aa2500..278719f4e0c 100644 --- a/src/pip/_internal/commands/wheel.py +++ b/src/pip/_internal/commands/wheel.py @@ -154,7 +154,6 @@ def run(self, options: Values, args: List[str]) -> int: reqs_to_build.append(req) preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) - requirement_set.warn_legacy_versions_and_specifiers() # build wheels build_successes, build_failures = build( diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 0609e450683..0542d12cc01 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -358,6 +358,17 @@ def __str__(self) -> str: ) +class MetadataInvalid(InstallationError): + """Metadata is invalid.""" + + def __init__(self, ireq: "InstallRequirement", error: str) -> None: + self.ireq = ireq + self.error = error + + def __str__(self) -> str: + return f"Requested {self.ireq} has invalid metadata: {self.error}" + + class InstallationSubprocessError(DiagnosticPipError, InstallationError): """A subprocess call failed.""" diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index 98ee6315567..fb270f22f83 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -11,7 +11,7 @@ from pip._vendor.packaging import specifiers from pip._vendor.packaging.tags import Tag from pip._vendor.packaging.utils import canonicalize_name -from pip._vendor.packaging.version import _BaseVersion +from pip._vendor.packaging.version import InvalidVersion, _BaseVersion from pip._vendor.packaging.version import parse as parse_version from pip._internal.exceptions import ( @@ -752,11 +752,14 @@ def get_install_candidate( self._log_skipped_link(link, result, detail) return None - return InstallationCandidate( - name=link_evaluator.project_name, - link=link, - version=detail, - ) + try: + return InstallationCandidate( + name=link_evaluator.project_name, + link=link, + version=detail, + ) + except InvalidVersion: + return None def evaluate_links( self, link_evaluator: LinkEvaluator, links: Iterable[Link] diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index af0412c819c..9eabcdb278b 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -25,7 +25,7 @@ from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.specifiers import InvalidSpecifier, SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name -from pip._vendor.packaging.version import LegacyVersion, Version +from pip._vendor.packaging.version import Version from pip._internal.exceptions import NoneMetadataError from pip._internal.locations import site_packages, user_site @@ -41,8 +41,6 @@ from ._json import msg_to_json -DistributionVersion = Union[LegacyVersion, Version] - InfoPath = Union[str, pathlib.PurePath] logger = logging.getLogger(__name__) @@ -140,10 +138,10 @@ def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution": raise NotImplementedError() def __repr__(self) -> str: - return f"{self.raw_name} {self.version} ({self.location})" + return f"{self.raw_name} {self.raw_version} ({self.location})" def __str__(self) -> str: - return f"{self.raw_name} {self.version}" + return f"{self.raw_name} {self.raw_version}" @property def location(self) -> Optional[str]: @@ -274,7 +272,11 @@ def canonical_name(self) -> NormalizedName: raise NotImplementedError() @property - def version(self) -> DistributionVersion: + def version(self) -> Version: + raise NotImplementedError() + + @property + def raw_version(self) -> str: raise NotImplementedError() @property @@ -443,24 +445,19 @@ def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requiremen """ raise NotImplementedError() - def iter_provided_extras(self) -> Iterable[str]: + def iter_raw_dependencies(self) -> Iterable[str]: + """Raw Requires-Dist metadata.""" + return self.metadata.get_all("Requires-Dist", []) + + def iter_provided_extras(self) -> Iterable[NormalizedName]: """Extras provided by this distribution. For modern .dist-info distributions, this is the collection of "Provides-Extra:" entries in distribution metadata. - The return value of this function is not particularly useful other than - display purposes due to backward compatibility issues and the extra - names being poorly normalized prior to PEP 685. If you want to perform - logic operations on extras, use :func:`is_extra_provided` instead. - """ - raise NotImplementedError() - - def is_extra_provided(self, extra: str) -> bool: - """Check whether an extra is provided by this distribution. - - This is needed mostly for compatibility issues with pkg_resources not - following the extra normalization rules defined in PEP 685. + The return value of this function is expected to be normalised names, + per PEP 685, with the returned value being handled appropriately by + `iter_dependencies`. """ raise NotImplementedError() diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index 8591029f16e..f65ccb1e706 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -16,13 +16,13 @@ from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name +from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version from pip._internal.exceptions import InvalidWheel, UnsupportedWheel from pip._internal.metadata.base import ( BaseDistribution, BaseEntryPoint, - DistributionVersion, InfoPath, Wheel, ) @@ -171,9 +171,13 @@ def canonical_name(self) -> NormalizedName: return canonicalize_name(name) @property - def version(self) -> DistributionVersion: + def version(self) -> Version: return parse_version(self._dist.version) + @property + def raw_version(self) -> str: + return self._dist.version + def is_file(self, path: InfoPath) -> bool: return self._dist.read_text(str(path)) is not None @@ -204,19 +208,18 @@ def _metadata_impl(self) -> email.message.Message: # until upstream can improve the protocol. (python/cpython#94952) return cast(email.message.Message, self._dist.metadata) - def iter_provided_extras(self) -> Iterable[str]: - return self.metadata.get_all("Provides-Extra", []) - - def is_extra_provided(self, extra: str) -> bool: - return any( - canonicalize_name(provided_extra) == canonicalize_name(extra) - for provided_extra in self.metadata.get_all("Provides-Extra", []) - ) + def iter_provided_extras(self) -> Iterable[NormalizedName]: + return [ + canonicalize_name(extra) + for extra in self.metadata.get_all("Provides-Extra", []) + ] def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]: contexts: Sequence[Dict[str, str]] = [{"extra": e} for e in extras] for req_string in self.metadata.get_all("Requires-Dist", []): - req = Requirement(req_string) + # strip() because email.message.Message.get_all() may return a leading \n + # in case a long header was wrapped. + req = Requirement(req_string.strip()) if not req.marker: yield req elif not extras and req.marker.evaluate({"extra": ""}): diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index bb11e5bd8a5..235556781c6 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -3,11 +3,21 @@ import logging import os import zipfile -from typing import Collection, Iterable, Iterator, List, Mapping, NamedTuple, Optional +from typing import ( + Collection, + Iterable, + Iterator, + List, + Mapping, + NamedTuple, + Optional, + cast, +) from pip._vendor import pkg_resources from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name +from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version from pip._internal.exceptions import InvalidWheel, NoneMetadataError, UnsupportedWheel @@ -19,7 +29,6 @@ BaseDistribution, BaseEntryPoint, BaseEnvironment, - DistributionVersion, InfoPath, Wheel, ) @@ -75,6 +84,19 @@ def run_script(self, script_name: str, namespace: str) -> None: class Distribution(BaseDistribution): def __init__(self, dist: pkg_resources.Distribution) -> None: self._dist = dist + # This is populated lazily, to avoid loading metadata for all possible + # distributions eagerly. + self.__extra_mapping: Optional[Mapping[NormalizedName, str]] = None + + @property + def _extra_mapping(self) -> Mapping[NormalizedName, str]: + if self.__extra_mapping is None: + self.__extra_mapping = { + canonicalize_name(extra): pkg_resources.safe_extra(cast(str, extra)) + for extra in self.metadata.get_all("Provides-Extra", []) + } + + return self.__extra_mapping @classmethod def from_directory(cls, directory: str) -> BaseDistribution: @@ -168,9 +190,13 @@ def canonical_name(self) -> NormalizedName: return canonicalize_name(self._dist.project_name) @property - def version(self) -> DistributionVersion: + def version(self) -> Version: return parse_version(self._dist.version) + @property + def raw_version(self) -> str: + return self._dist.version + def is_file(self, path: InfoPath) -> bool: return self._dist.has_metadata(str(path)) @@ -215,16 +241,15 @@ def _metadata_impl(self) -> email.message.Message: return feed_parser.close() def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]: - if extras: # pkg_resources raises on invalid extras, so we sanitize. - extras = frozenset(pkg_resources.safe_extra(e) for e in extras) - extras = extras.intersection(self._dist.extras) + if extras: + relevant_extras = set(self._extra_mapping) & set( + map(canonicalize_name, extras) + ) + extras = [self._extra_mapping[extra] for extra in relevant_extras] return self._dist.requires(extras) - def iter_provided_extras(self) -> Iterable[str]: - return self._dist.extras - - def is_extra_provided(self, extra: str) -> bool: - return pkg_resources.safe_extra(extra) in self._dist.extras + def iter_provided_extras(self) -> Iterable[NormalizedName]: + return self._extra_mapping.keys() class Environment(BaseEnvironment): diff --git a/src/pip/_internal/operations/check.py b/src/pip/_internal/operations/check.py index 90c6a58a55e..623db76e229 100644 --- a/src/pip/_internal/operations/check.py +++ b/src/pip/_internal/operations/check.py @@ -5,28 +5,25 @@ from typing import Callable, Dict, List, NamedTuple, Optional, Set, Tuple from pip._vendor.packaging.requirements import Requirement -from pip._vendor.packaging.specifiers import LegacySpecifier from pip._vendor.packaging.utils import NormalizedName, canonicalize_name -from pip._vendor.packaging.version import LegacyVersion +from pip._vendor.packaging.version import Version from pip._internal.distributions import make_distribution_for_install_requirement from pip._internal.metadata import get_default_environment -from pip._internal.metadata.base import DistributionVersion from pip._internal.req.req_install import InstallRequirement -from pip._internal.utils.deprecation import deprecated logger = logging.getLogger(__name__) class PackageDetails(NamedTuple): - version: DistributionVersion + version: Version dependencies: List[Requirement] # Shorthands PackageSet = Dict[NormalizedName, PackageDetails] Missing = Tuple[NormalizedName, Requirement] -Conflicting = Tuple[NormalizedName, DistributionVersion, Requirement] +Conflicting = Tuple[NormalizedName, Version, Requirement] MissingDict = Dict[NormalizedName, List[Missing]] ConflictingDict = Dict[NormalizedName, List[Conflicting]] @@ -46,7 +43,7 @@ def create_package_set_from_installed() -> Tuple[PackageSet, bool]: package_set[name] = PackageDetails(dist.version, dependencies) except (OSError, ValueError) as e: # Don't crash on unreadable or broken metadata. - logger.warning("Error parsing requirements for %s: %s", name, e) + logger.warning("Error parsing dependencies of %s: %s", name, e) problems = True return package_set, problems @@ -60,8 +57,6 @@ def check_package_set( package name and returns a boolean. """ - warn_legacy_versions_and_specifiers(package_set) - missing = {} conflicting = {} @@ -152,36 +147,3 @@ def _create_whitelist( break return packages_affected - - -def warn_legacy_versions_and_specifiers(package_set: PackageSet) -> None: - for project_name, package_details in package_set.items(): - if isinstance(package_details.version, LegacyVersion): - deprecated( - reason=( - f"{project_name} {package_details.version} " - f"has a non-standard version number." - ), - replacement=( - f"to upgrade to a newer version of {project_name} " - f"or contact the author to suggest that they " - f"release a version with a conforming version number" - ), - issue=12063, - gone_in="24.1", - ) - for dep in package_details.dependencies: - if any(isinstance(spec, LegacySpecifier) for spec in dep.specifier): - deprecated( - reason=( - f"{project_name} {package_details.version} " - f"has a non-standard dependency specifier {dep}." - ), - replacement=( - f"to upgrade to a newer version of {project_name} " - f"or contact the author to suggest that they " - f"release a version with a conforming dependency specifiers" - ), - issue=12063, - gone_in="24.1", - ) diff --git a/src/pip/_internal/operations/freeze.py b/src/pip/_internal/operations/freeze.py index 35445684514..bb1039fb776 100644 --- a/src/pip/_internal/operations/freeze.py +++ b/src/pip/_internal/operations/freeze.py @@ -4,7 +4,7 @@ from typing import Container, Dict, Generator, Iterable, List, NamedTuple, Optional, Set from pip._vendor.packaging.utils import canonicalize_name -from pip._vendor.packaging.version import Version +from pip._vendor.packaging.version import InvalidVersion from pip._internal.exceptions import BadCommand, InstallationError from pip._internal.metadata import BaseDistribution, get_environment @@ -145,10 +145,13 @@ def freeze( def _format_as_name_version(dist: BaseDistribution) -> str: - dist_version = dist.version - if isinstance(dist_version, Version): + try: + dist_version = dist.version + except InvalidVersion: + # legacy version + return f"{dist.raw_name}==={dist.raw_version}" + else: return f"{dist.raw_name}=={dist_version}" - return f"{dist.raw_name}==={dist_version}" def _get_editable_info(dist: BaseDistribution) -> _EditableInfo: diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py index 36f517e599d..b2c9505a9a4 100644 --- a/src/pip/_internal/req/constructors.py +++ b/src/pip/_internal/req/constructors.py @@ -359,7 +359,7 @@ def with_source(text: str) -> str: def _parse_req_string(req_as_string: str) -> Requirement: try: - req = get_requirement(req_as_string) + return get_requirement(req_as_string) except InvalidRequirement: if os.path.sep in req_as_string: add_msg = "It looks like a path." @@ -374,17 +374,6 @@ def _parse_req_string(req_as_string: str) -> Requirement: if add_msg: msg += f"\nHint: {add_msg}" raise InstallationError(msg) - else: - # Deprecate extras after specifiers: "name>=1.0[extras]" - # This currently works by accident because _strip_extras() parses - # any extras in the end of the string and those are saved in - # RequirementParts - for spec in req.specifier: - spec_str = str(spec) - if spec_str.endswith("]"): - msg = f"Extras after version '{spec_str}'." - raise InstallationError(msg) - return req if req_as_string is not None: req: Optional[Requirement] = _parse_req_string(req_as_string) diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 7d527959e81..213278588d2 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -52,7 +52,6 @@ redact_auth_from_requirement, redact_auth_from_url, ) -from pip._internal.utils.packaging import safe_extra from pip._internal.utils.subprocess import runner_with_spinner_message from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds from pip._internal.utils.unpacking import unpack_file @@ -284,12 +283,7 @@ def match_markers(self, extras_requested: Optional[Iterable[str]] = None) -> boo extras_requested = ("",) if self.markers is not None: return any( - self.markers.evaluate({"extra": extra}) - # TODO: Remove these two variants when packaging is upgraded to - # support the marker comparison logic specified in PEP 685. - or self.markers.evaluate({"extra": safe_extra(extra)}) - or self.markers.evaluate({"extra": canonicalize_name(extra)}) - for extra in extras_requested + self.markers.evaluate({"extra": extra}) for extra in extras_requested ) else: return True diff --git a/src/pip/_internal/req/req_set.py b/src/pip/_internal/req/req_set.py index bf36114e802..ec7a6e07a25 100644 --- a/src/pip/_internal/req/req_set.py +++ b/src/pip/_internal/req/req_set.py @@ -2,12 +2,9 @@ from collections import OrderedDict from typing import Dict, List -from pip._vendor.packaging.specifiers import LegacySpecifier from pip._vendor.packaging.utils import canonicalize_name -from pip._vendor.packaging.version import LegacyVersion from pip._internal.req.req_install import InstallRequirement -from pip._internal.utils.deprecation import deprecated logger = logging.getLogger(__name__) @@ -83,37 +80,3 @@ def requirements_to_install(self) -> List[InstallRequirement]: for install_req in self.all_requirements if not install_req.constraint and not install_req.satisfied_by ] - - def warn_legacy_versions_and_specifiers(self) -> None: - for req in self.requirements_to_install: - version = req.get_dist().version - if isinstance(version, LegacyVersion): - deprecated( - reason=( - f"pip has selected the non standard version {version} " - f"of {req}. In the future this version will be " - f"ignored as it isn't standard compliant." - ), - replacement=( - "set or update constraints to select another version " - "or contact the package author to fix the version number" - ), - issue=12063, - gone_in="24.1", - ) - for dep in req.get_dist().iter_dependencies(): - if any(isinstance(spec, LegacySpecifier) for spec in dep.specifier): - deprecated( - reason=( - f"pip has selected {req} {version} which has non " - f"standard dependency specifier {dep}. " - f"In the future this version of {req} will be " - f"ignored as it isn't standard compliant." - ), - replacement=( - "set or update constraints to select another version " - "or contact the package author to fix the version number" - ), - issue=12063, - gone_in="24.1", - ) diff --git a/src/pip/_internal/req/req_uninstall.py b/src/pip/_internal/req/req_uninstall.py index 3a9ae2b1097..51ee01171ae 100644 --- a/src/pip/_internal/req/req_uninstall.py +++ b/src/pip/_internal/req/req_uninstall.py @@ -367,7 +367,7 @@ def remove(self, auto_confirm: bool = False, verbose: bool = False) -> None: ) return - dist_name_version = f"{self._dist.raw_name}-{self._dist.version}" + dist_name_version = f"{self._dist.raw_name}-{self._dist.raw_version}" logger.info("Uninstalling %s:", dist_name_version) with indent_log(): diff --git a/src/pip/_internal/resolution/resolvelib/base.py b/src/pip/_internal/resolution/resolvelib/base.py index 4269c7fbecc..0f31dc9b307 100644 --- a/src/pip/_internal/resolution/resolvelib/base.py +++ b/src/pip/_internal/resolution/resolvelib/base.py @@ -1,16 +1,15 @@ from dataclasses import dataclass -from typing import FrozenSet, Iterable, Optional, Tuple, Union +from typing import FrozenSet, Iterable, Optional, Tuple from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName -from pip._vendor.packaging.version import LegacyVersion, Version +from pip._vendor.packaging.version import Version from pip._internal.models.link import Link, links_equivalent from pip._internal.req.req_install import InstallRequirement from pip._internal.utils.hashes import Hashes CandidateLookup = Tuple[Optional["Candidate"], Optional[InstallRequirement]] -CandidateVersion = Union[LegacyVersion, Version] def format_name(project: NormalizedName, extras: FrozenSet[NormalizedName]) -> str: @@ -115,7 +114,7 @@ def name(self) -> str: raise NotImplementedError("Override in subclass") @property - def version(self) -> CandidateVersion: + def version(self) -> Version: raise NotImplementedError("Override in subclass") @property diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index 019ff60a0a5..d30d477be68 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -2,6 +2,7 @@ import sys from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Optional, Tuple, Union, cast +from pip._vendor.packaging.requirements import InvalidRequirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import Version @@ -9,6 +10,7 @@ HashError, InstallationSubprocessError, MetadataInconsistent, + MetadataInvalid, ) from pip._internal.metadata import BaseDistribution from pip._internal.models.link import Link, links_equivalent @@ -21,7 +23,7 @@ from pip._internal.utils.direct_url_helpers import direct_url_from_link from pip._internal.utils.misc import normalize_version_info -from .base import Candidate, CandidateVersion, Requirement, format_name +from .base import Candidate, Requirement, format_name if TYPE_CHECKING: from .factory import Factory @@ -145,7 +147,7 @@ def __init__( ireq: InstallRequirement, factory: "Factory", name: Optional[NormalizedName] = None, - version: Optional[CandidateVersion] = None, + version: Optional[Version] = None, ) -> None: self._link = link self._source_link = source_link @@ -190,7 +192,7 @@ def name(self) -> str: return self.project_name @property - def version(self) -> CandidateVersion: + def version(self) -> Version: if self._version is None: self._version = self.dist.version return self._version @@ -220,6 +222,13 @@ def _check_metadata_consistency(self, dist: BaseDistribution) -> None: str(self._version), str(dist.version), ) + # check dependencies are valid + # TODO performance: this means we iterate the dependencies at least twice, + # we may want to cache parsed Requires-Dist + try: + list(dist.iter_dependencies(list(dist.iter_provided_extras()))) + except InvalidRequirement as e: + raise MetadataInvalid(self._ireq, str(e)) def _prepare(self) -> BaseDistribution: try: @@ -257,7 +266,7 @@ def __init__( template: InstallRequirement, factory: "Factory", name: Optional[NormalizedName] = None, - version: Optional[CandidateVersion] = None, + version: Optional[Version] = None, ) -> None: source_link = link cache_entry = factory.get_wheel_cache_entry(source_link, name) @@ -314,7 +323,7 @@ def __init__( template: InstallRequirement, factory: "Factory", name: Optional[NormalizedName] = None, - version: Optional[CandidateVersion] = None, + version: Optional[Version] = None, ) -> None: super().__init__( link=link, @@ -374,7 +383,7 @@ def name(self) -> str: return self.project_name @property - def version(self) -> CandidateVersion: + def version(self) -> Version: if self._version is None: self._version = self.dist.version return self._version @@ -438,14 +447,6 @@ def __init__( """ self.base = base self.extras = frozenset(canonicalize_name(e) for e in extras) - # If any extras are requested in their non-normalized forms, keep track - # of their raw values. This is needed when we look up dependencies - # since PEP 685 has not been implemented for marker-matching, and using - # the non-normalized extra for lookup ensures the user can select a - # non-normalized extra in a package with its non-normalized form. - # TODO: Remove this attribute when packaging is upgraded to support the - # marker comparison logic specified in PEP 685. - self._unnormalized_extras = extras.difference(self.extras) self._comes_from = comes_from if comes_from is not None else self.base._ireq def __str__(self) -> str: @@ -473,7 +474,7 @@ def name(self) -> str: return format_name(self.base.project_name, self.extras) @property - def version(self) -> CandidateVersion: + def version(self) -> Version: return self.base.version def format_for_error(self) -> str: @@ -493,50 +494,6 @@ def is_editable(self) -> bool: def source_link(self) -> Optional[Link]: return self.base.source_link - def _warn_invalid_extras( - self, - requested: FrozenSet[str], - valid: FrozenSet[str], - ) -> None: - """Emit warnings for invalid extras being requested. - - This emits a warning for each requested extra that is not in the - candidate's ``Provides-Extra`` list. - """ - invalid_extras_to_warn = frozenset( - extra - for extra in requested - if extra not in valid - # If an extra is requested in an unnormalized form, skip warning - # about the normalized form being missing. - and extra in self.extras - ) - if not invalid_extras_to_warn: - return - for extra in sorted(invalid_extras_to_warn): - logger.warning( - "%s %s does not provide the extra '%s'", - self.base.name, - self.version, - extra, - ) - - def _calculate_valid_requested_extras(self) -> FrozenSet[str]: - """Get a list of valid extras requested by this candidate. - - The user (or upstream dependent) may have specified extras that the - candidate doesn't support. Any unsupported extras are dropped, and each - cause a warning to be logged here. - """ - requested_extras = self.extras.union(self._unnormalized_extras) - valid_extras = frozenset( - extra - for extra in requested_extras - if self.base.dist.is_extra_provided(extra) - ) - self._warn_invalid_extras(requested_extras, valid_extras) - return valid_extras - def iter_dependencies(self, with_requires: bool) -> Iterable[Optional[Requirement]]: factory = self.base._factory @@ -546,7 +503,18 @@ def iter_dependencies(self, with_requires: bool) -> Iterable[Optional[Requiremen if not with_requires: return - valid_extras = self._calculate_valid_requested_extras() + # The user may have specified extras that the candidate doesn't + # support. We ignore any unsupported extras here. + valid_extras = self.extras.intersection(self.base.dist.iter_provided_extras()) + invalid_extras = self.extras.difference(self.base.dist.iter_provided_extras()) + for extra in sorted(invalid_extras): + logger.warning( + "%s %s does not provide the extra '%s'", + self.base.name, + self.version, + extra, + ) + for r in self.base.dist.iter_dependencies(valid_extras): yield from factory.make_requirements_from_spec( str(r), @@ -588,7 +556,7 @@ def name(self) -> str: return REQUIRES_PYTHON_IDENTIFIER @property - def version(self) -> CandidateVersion: + def version(self) -> Version: return self._version def format_for_error(self) -> str: diff --git a/src/pip/_internal/resolution/resolvelib/factory.py b/src/pip/_internal/resolution/resolvelib/factory.py index e36df15d459..1f31d834b04 100644 --- a/src/pip/_internal/resolution/resolvelib/factory.py +++ b/src/pip/_internal/resolution/resolvelib/factory.py @@ -23,6 +23,7 @@ from pip._vendor.packaging.requirements import InvalidRequirement from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name +from pip._vendor.packaging.version import Version from pip._vendor.resolvelib import ResolutionImpossible from pip._internal.cache import CacheEntry, WheelCache @@ -30,6 +31,7 @@ DistributionNotFound, InstallationError, MetadataInconsistent, + MetadataInvalid, UnsupportedPythonVersion, UnsupportedWheel, ) @@ -52,7 +54,7 @@ from pip._internal.utils.packaging import get_requirement from pip._internal.utils.virtualenv import running_under_virtualenv -from .base import Candidate, CandidateVersion, Constraint, Requirement +from .base import Candidate, Constraint, Requirement from .candidates import ( AlreadyInstalledCandidate, BaseCandidate, @@ -178,7 +180,7 @@ def _make_candidate_from_link( extras: FrozenSet[str], template: InstallRequirement, name: Optional[NormalizedName], - version: Optional[CandidateVersion], + version: Optional[Version], ) -> Optional[Candidate]: base: Optional[BaseCandidate] = self._make_base_candidate_from_link( link, template, name, version @@ -192,7 +194,7 @@ def _make_base_candidate_from_link( link: Link, template: InstallRequirement, name: Optional[NormalizedName], - version: Optional[CandidateVersion], + version: Optional[Version], ) -> Optional[BaseCandidate]: # TODO: Check already installed candidate, and use it if the link and # editable flag match. @@ -212,7 +214,7 @@ def _make_base_candidate_from_link( name=name, version=version, ) - except MetadataInconsistent as e: + except (MetadataInconsistent, MetadataInvalid) as e: logger.info( "Discarding [blue underline]%s[/]: [yellow]%s[reset]", link, @@ -670,8 +672,8 @@ def _report_single_requirement_conflict( cands = self._finder.find_all_candidates(req.project_name) skipped_by_requires_python = self._finder.requires_python_skipped_reasons() - versions_set: Set[CandidateVersion] = set() - yanked_versions_set: Set[CandidateVersion] = set() + versions_set: Set[Version] = set() + yanked_versions_set: Set[Version] = set() for c in cands: is_yanked = c.link.is_yanked if c.link else False if is_yanked: diff --git a/src/pip/_internal/resolution/resolvelib/found_candidates.py b/src/pip/_internal/resolution/resolvelib/found_candidates.py index 8663097b447..a1d57e0f4b2 100644 --- a/src/pip/_internal/resolution/resolvelib/found_candidates.py +++ b/src/pip/_internal/resolution/resolvelib/found_candidates.py @@ -9,13 +9,18 @@ """ import functools +import logging from collections.abc import Sequence from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, Set, Tuple from pip._vendor.packaging.version import _BaseVersion +from pip._internal.exceptions import MetadataInvalid + from .base import Candidate +logger = logging.getLogger(__name__) + IndexCandidateInfo = Tuple[_BaseVersion, Callable[[], Optional[Candidate]]] if TYPE_CHECKING: @@ -44,11 +49,25 @@ def _iter_built(infos: Iterator[IndexCandidateInfo]) -> Iterator[Candidate]: for version, func in infos: if version in versions_found: continue - candidate = func() - if candidate is None: - continue - yield candidate - versions_found.add(version) + try: + candidate = func() + except MetadataInvalid as e: + logger.warning( + "Ignoring version %s of %s since it has invalid metadata:\n" + "%s\n" + "Please use pip<24.1 if you need to use this version.", + version, + e.ireq.name, + e, + ) + # Mark version as found to avoid trying other candidates with the same + # version, since they most likely have invalid metadata as well. + versions_found.add(version) + else: + if candidate is None: + continue + yield candidate + versions_found.add(version) def _iter_built_with_prepended( diff --git a/src/pip/_internal/self_outdated_check.py b/src/pip/_internal/self_outdated_check.py index 0f64ae0e614..2185f2fb108 100644 --- a/src/pip/_internal/self_outdated_check.py +++ b/src/pip/_internal/self_outdated_check.py @@ -9,6 +9,7 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, Optional +from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version from pip._vendor.rich.console import Group from pip._vendor.rich.markup import escape @@ -17,7 +18,6 @@ from pip._internal.index.collector import LinkCollector from pip._internal.index.package_finder import PackageFinder from pip._internal.metadata import get_default_environment -from pip._internal.metadata.base import DistributionVersion from pip._internal.models.selection_prefs import SelectionPreferences from pip._internal.network.session import PipSession from pip._internal.utils.compat import WINDOWS @@ -191,7 +191,7 @@ def _self_version_check_logic( *, state: SelfCheckState, current_time: datetime.datetime, - local_version: DistributionVersion, + local_version: Version, get_remote_version: Callable[[], Optional[str]], ) -> Optional[UpgradePrompt]: remote_version_str = state.get(current_time) diff --git a/src/pip/_vendor/packaging/__about__.py b/src/pip/_vendor/packaging/__about__.py deleted file mode 100644 index 3551bc2d298..00000000000 --- a/src/pip/_vendor/packaging/__about__.py +++ /dev/null @@ -1,26 +0,0 @@ -# This file is dual licensed under the terms of the Apache License, Version -# 2.0, and the BSD License. See the LICENSE file in the root of this repository -# for complete details. - -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] - -__title__ = "packaging" -__summary__ = "Core utilities for Python packages" -__uri__ = "https://github.com/pypa/packaging" - -__version__ = "21.3" - -__author__ = "Donald Stufft and individual contributors" -__email__ = "donald@stufft.io" - -__license__ = "BSD-2-Clause or Apache-2.0" -__copyright__ = "2014-2019 %s" % __author__ diff --git a/src/pip/_vendor/packaging/__init__.py b/src/pip/_vendor/packaging/__init__.py index 3c50c5dcfee..e7c0aa12ca9 100644 --- a/src/pip/_vendor/packaging/__init__.py +++ b/src/pip/_vendor/packaging/__init__.py @@ -2,24 +2,14 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from .__about__ import ( - __author__, - __copyright__, - __email__, - __license__, - __summary__, - __title__, - __uri__, - __version__, -) +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] +__version__ = "24.0" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014 %s" % __author__ diff --git a/src/pip/_vendor/packaging/_elffile.py b/src/pip/_vendor/packaging/_elffile.py new file mode 100644 index 00000000000..6fb19b30bb5 --- /dev/null +++ b/src/pip/_vendor/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/src/pip/_vendor/packaging/_manylinux.py b/src/pip/_vendor/packaging/_manylinux.py index 4c379aa6f69..ad62505f3ff 100644 --- a/src/pip/_vendor/packaging/_manylinux.py +++ b/src/pip/_vendor/packaging/_manylinux.py @@ -1,122 +1,70 @@ import collections +import contextlib import functools import os import re -import struct import sys import warnings -from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple - - -# Python does not provide platform information at sufficient granularity to -# identify the architecture of the running executable in some cases, so we -# determine it dynamically by reading the information from the running -# process. This only applies on Linux, which uses the ELF format. -class _ELFFileHeader: - # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header - class _InvalidELFFileHeader(ValueError): - """ - An invalid ELF file header was found. - """ - - ELF_MAGIC_NUMBER = 0x7F454C46 - ELFCLASS32 = 1 - ELFCLASS64 = 2 - ELFDATA2LSB = 1 - ELFDATA2MSB = 2 - EM_386 = 3 - EM_S390 = 22 - EM_ARM = 40 - EM_X86_64 = 62 - EF_ARM_ABIMASK = 0xFF000000 - EF_ARM_ABI_VER5 = 0x05000000 - EF_ARM_ABI_FLOAT_HARD = 0x00000400 - - def __init__(self, file: IO[bytes]) -> None: - def unpack(fmt: str) -> int: - try: - data = file.read(struct.calcsize(fmt)) - result: Tuple[int, ...] = struct.unpack(fmt, data) - except struct.error: - raise _ELFFileHeader._InvalidELFFileHeader() - return result[0] - - self.e_ident_magic = unpack(">I") - if self.e_ident_magic != self.ELF_MAGIC_NUMBER: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_class = unpack("B") - if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_data = unpack("B") - if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_version = unpack("B") - self.e_ident_osabi = unpack("B") - self.e_ident_abiversion = unpack("B") - self.e_ident_pad = file.read(7) - format_h = "H" - format_i = "I" - format_q = "Q" - format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q - self.e_type = unpack(format_h) - self.e_machine = unpack(format_h) - self.e_version = unpack(format_i) - self.e_entry = unpack(format_p) - self.e_phoff = unpack(format_p) - self.e_shoff = unpack(format_p) - self.e_flags = unpack(format_i) - self.e_ehsize = unpack(format_h) - self.e_phentsize = unpack(format_h) - self.e_phnum = unpack(format_h) - self.e_shentsize = unpack(format_h) - self.e_shnum = unpack(format_h) - self.e_shstrndx = unpack(format_h) - - -def _get_elf_header() -> Optional[_ELFFileHeader]: +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Sequence, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: try: - with open(sys.executable, "rb") as f: - elf_header = _ELFFileHeader(f) - except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): - return None - return elf_header + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None -def _is_linux_armhf() -> bool: +def _is_linux_armhf(executable: str) -> bool: # hard-float ABI can be detected from the ELF header of the running # process # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_ARM - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABIMASK - ) == elf_header.EF_ARM_ABI_VER5 - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD - ) == elf_header.EF_ARM_ABI_FLOAT_HARD - return result - - -def _is_linux_i686() -> bool: - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_386 - return result + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) -def _have_compatible_abi(arch: str) -> bool: - if arch == "armv7l": - return _is_linux_armhf() - if arch == "i686": - return _is_linux_i686() - return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} +def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool: + if "armv7l" in archs: + return _is_linux_armhf(executable) + if "i686" in archs: + return _is_linux_i686(executable) + allowed_archs = { + "x86_64", + "aarch64", + "ppc64", + "ppc64le", + "s390x", + "loongarch64", + "riscv64", + } + return any(arch in allowed_archs for arch in archs) # If glibc ever changes its major version, we need to know what the last @@ -141,10 +89,10 @@ def _glibc_version_string_confstr() -> Optional[str]: # platform module. # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 try: - # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". - version_string = os.confstr("CS_GNU_LIBC_VERSION") + # Should be a string like "glibc 2.17". + version_string: Optional[str] = os.confstr("CS_GNU_LIBC_VERSION") assert version_string is not None - _, version = version_string.split() + _, version = version_string.rsplit() except (AssertionError, AttributeError, OSError, ValueError): # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... return None @@ -211,8 +159,8 @@ def _parse_glibc_version(version_str: str) -> Tuple[int, int]: m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) if not m: warnings.warn( - "Expected glibc version with 2 components major.minor," - " got: %s" % version_str, + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", RuntimeWarning, ) return -1, -1 @@ -228,13 +176,13 @@ def _get_glibc_version() -> Tuple[int, int]: # From PEP 513, PEP 600 -def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: +def _is_compatible(arch: str, version: _GLibCVersion) -> bool: sys_glibc = _get_glibc_version() if sys_glibc < version: return False # Check for presence of _manylinux module. try: - import _manylinux # noqa + import _manylinux except ImportError: return True if hasattr(_manylinux, "manylinux_compatible"): @@ -264,12 +212,22 @@ def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: } -def platform_tags(linux: str, arch: str) -> Iterator[str]: - if not _have_compatible_abi(arch): +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate manylinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be manylinux-compatible. + + :returns: An iterator of compatible manylinux tags. + """ + if not _have_compatible_abi(sys.executable, archs): return # Oldest glibc to be supported regardless of architecture is (2, 17). too_old_glibc2 = _GLibCVersion(2, 16) - if arch in {"x86_64", "i686"}: + if set(archs) & {"x86_64", "i686"}: # On x86/i686 also oldest glibc to be supported is (2, 5). too_old_glibc2 = _GLibCVersion(2, 4) current_glibc = _GLibCVersion(*_get_glibc_version()) @@ -283,19 +241,20 @@ def platform_tags(linux: str, arch: str) -> Iterator[str]: for glibc_major in range(current_glibc.major - 1, 1, -1): glibc_minor = _LAST_GLIBC_MINOR[glibc_major] glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) - for glibc_max in glibc_max_list: - if glibc_max.major == too_old_glibc2.major: - min_minor = too_old_glibc2.minor - else: - # For other glibc major versions oldest supported is (x, 0). - min_minor = -1 - for glibc_minor in range(glibc_max.minor, min_minor, -1): - glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) - tag = "manylinux_{}_{}".format(*glibc_version) - if _is_compatible(tag, arch, glibc_version): - yield linux.replace("linux", tag) - # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. - if glibc_version in _LEGACY_MANYLINUX_MAP: - legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] - if _is_compatible(legacy_tag, arch, glibc_version): - yield linux.replace("linux", legacy_tag) + for arch in archs: + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(arch, glibc_version): + yield f"{tag}_{arch}" + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(arch, glibc_version): + yield f"{legacy_tag}_{arch}" diff --git a/src/pip/_vendor/packaging/_musllinux.py b/src/pip/_vendor/packaging/_musllinux.py index 8ac3059ba3c..86419df9d70 100644 --- a/src/pip/_vendor/packaging/_musllinux.py +++ b/src/pip/_vendor/packaging/_musllinux.py @@ -4,68 +4,13 @@ linked against musl, and what musl version is used. """ -import contextlib import functools -import operator -import os import re -import struct import subprocess import sys -from typing import IO, Iterator, NamedTuple, Optional, Tuple +from typing import Iterator, NamedTuple, Optional, Sequence - -def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: - return struct.unpack(fmt, f.read(struct.calcsize(fmt))) - - -def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]: - """Detect musl libc location by parsing the Python executable. - - Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca - ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html - """ - f.seek(0) - try: - ident = _read_unpacked(f, "16B") - except struct.error: - return None - if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. - return None - f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. - - try: - # e_fmt: Format for program header. - # p_fmt: Format for section header. - # p_idx: Indexes to find p_type, p_offset, and p_filesz. - e_fmt, p_fmt, p_idx = { - 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. - 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. - }[ident[4]] - except KeyError: - return None - else: - p_get = operator.itemgetter(*p_idx) - - # Find the interpreter section and return its content. - try: - _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) - except struct.error: - return None - for i in range(e_phnum + 1): - f.seek(e_phoff + e_phentsize * i) - try: - p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) - except struct.error: - return None - if p_type != 3: # Not PT_INTERP. - continue - f.seek(p_offset) - interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") - if "musl" not in interpreter: - return None - return interpreter - return None +from ._elffile import ELFFile class _MuslVersion(NamedTuple): @@ -95,32 +40,34 @@ def _get_musl_version(executable: str) -> Optional[_MuslVersion]: Version 1.2.2 Dynamic Program Loader """ - with contextlib.ExitStack() as stack: - try: - f = stack.enter_context(open(executable, "rb")) - except OSError: - return None - ld = _parse_ld_musl_from_elf(f) - if not ld: + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: return None - proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) + proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True) return _parse_musl_version(proc.stderr) -def platform_tags(arch: str) -> Iterator[str]: +def platform_tags(archs: Sequence[str]) -> Iterator[str]: """Generate musllinux tags compatible to the current platform. - :param arch: Should be the part of platform tag after the ``linux_`` - prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a - prerequisite for the current platform to be musllinux-compatible. + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be musllinux-compatible. :returns: An iterator of compatible musllinux tags. """ sys_musl = _get_musl_version(sys.executable) if sys_musl is None: # Python not dynamically linked against musl. return - for minor in range(sys_musl.minor, -1, -1): - yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + for arch in archs: + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" if __name__ == "__main__": # pragma: no cover diff --git a/src/pip/_vendor/packaging/_parser.py b/src/pip/_vendor/packaging/_parser.py new file mode 100644 index 00000000000..684df75457c --- /dev/null +++ b/src/pip/_vendor/packaging/_parser.py @@ -0,0 +1,356 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not support recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: + retval = _parse_marker(tokenizer) + tokenizer.expect("END", expected="end of marker expression") + return retval + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if env_var in ("platform_python_implementation", "python_implementation"): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/src/pip/_vendor/packaging/_tokenizer.py b/src/pip/_vendor/packaging/_tokenizer.py new file mode 100644 index 00000000000..dd0d648d49a --- /dev/null +++ b/src/pip/_vendor/packaging/_tokenizer.py @@ -0,0 +1,192 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/src/pip/_vendor/packaging/markers.py b/src/pip/_vendor/packaging/markers.py index 540e7a4dc79..8b98fca7233 100644 --- a/src/pip/_vendor/packaging/markers.py +++ b/src/pip/_vendor/packaging/markers.py @@ -8,19 +8,17 @@ import sys from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pip._vendor.pyparsing import ( # noqa: N817 - Forward, - Group, - Literal as L, - ParseException, - ParseResults, - QuotedString, - ZeroOrMore, - stringEnd, - stringStart, +from ._parser import ( + MarkerAtom, + MarkerList, + Op, + Value, + Variable, + parse_marker as _parse_marker, ) - +from ._tokenizer import ParserSyntaxError from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name __all__ = [ "InvalidMarker", @@ -52,101 +50,24 @@ class UndefinedEnvironmentName(ValueError): """ -class Node: - def __init__(self, value: Any) -> None: - self.value = value - - def __str__(self) -> str: - return str(self.value) - - def __repr__(self) -> str: - return f"<{self.__class__.__name__}('{self}')>" - - def serialize(self) -> str: - raise NotImplementedError - - -class Variable(Node): - def serialize(self) -> str: - return str(self) - - -class Value(Node): - def serialize(self) -> str: - return f'"{self}"' - - -class Op(Node): - def serialize(self) -> str: - return str(self) - - -VARIABLE = ( - L("implementation_version") - | L("platform_python_implementation") - | L("implementation_name") - | L("python_full_version") - | L("platform_release") - | L("platform_version") - | L("platform_machine") - | L("platform_system") - | L("python_version") - | L("sys_platform") - | L("os_name") - | L("os.name") # PEP-345 - | L("sys.platform") # PEP-345 - | L("platform.version") # PEP-345 - | L("platform.machine") # PEP-345 - | L("platform.python_implementation") # PEP-345 - | L("python_implementation") # undocumented setuptools legacy - | L("extra") # PEP-508 -) -ALIASES = { - "os.name": "os_name", - "sys.platform": "sys_platform", - "platform.version": "platform_version", - "platform.machine": "platform_machine", - "platform.python_implementation": "platform_python_implementation", - "python_implementation": "platform_python_implementation", -} -VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) - -VERSION_CMP = ( - L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") -) - -MARKER_OP = VERSION_CMP | L("not in") | L("in") -MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) - -MARKER_VALUE = QuotedString("'") | QuotedString('"') -MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) - -BOOLOP = L("and") | L("or") - -MARKER_VAR = VARIABLE | MARKER_VALUE - -MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) -MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) - -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() - -MARKER_EXPR = Forward() -MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) -MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) - -MARKER = stringStart + MARKER_EXPR + stringEnd - - -def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]: - if isinstance(results, ParseResults): - return [_coerce_parse_result(i) for i in results] - else: - return results +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results def _format_marker( - marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True ) -> str: assert isinstance(marker, (list, tuple, str)) @@ -192,7 +113,7 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: except InvalidSpecifier: pass else: - return spec.contains(lhs) + return spec.contains(lhs, prereleases=True) oper: Optional[Operator] = _operators.get(op.serialize()) if oper is None: @@ -201,25 +122,19 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: return oper(lhs, rhs) -class Undefined: - pass - +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) -_undefined = Undefined() + # other environment markers don't have such standards + return values -def _get_env(environment: Dict[str, str], name: str) -> str: - value: Union[str, Undefined] = environment.get(name, _undefined) - - if isinstance(value, Undefined): - raise UndefinedEnvironmentName( - f"{name!r} does not exist in evaluation environment." - ) - - return value - - -def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: groups: List[List[bool]] = [[]] for marker in markers: @@ -231,12 +146,15 @@ def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: lhs, op, rhs = marker if isinstance(lhs, Variable): - lhs_value = _get_env(environment, lhs.value) + environment_key = lhs.value + lhs_value = environment[environment_key] rhs_value = rhs.value else: lhs_value = lhs.value - rhs_value = _get_env(environment, rhs.value) + environment_key = rhs.value + rhs_value = environment[environment_key] + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) groups[-1].append(_eval_op(lhs_value, op, rhs_value)) else: assert marker in ["and", "or"] @@ -274,13 +192,29 @@ def default_environment() -> Dict[str, str]: class Marker: def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. try: - self._markers = _coerce_parse_result(MARKER.parseString(marker)) - except ParseException as e: - raise InvalidMarker( - f"Invalid marker: {marker!r}, parse error at " - f"{marker[e.loc : e.loc + 8]!r}" - ) + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e def __str__(self) -> str: return _format_marker(self._markers) @@ -288,6 +222,15 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"" + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: """Evaluate a marker. @@ -298,7 +241,12 @@ def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: The environment is determined from the current Python process. """ current_environment = default_environment() + current_environment["extra"] = "" if environment is not None: current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if current_environment["extra"] is None: + current_environment["extra"] = "" return _evaluate_markers(self._markers, current_environment) diff --git a/src/pip/_vendor/packaging/metadata.py b/src/pip/_vendor/packaging/metadata.py new file mode 100644 index 00000000000..5e4c106c549 --- /dev/null +++ b/src/pip/_vendor/packaging/metadata.py @@ -0,0 +1,825 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import ( + Any, + Callable, + Dict, + Generic, + List, + Optional, + Tuple, + Type, + Union, + cast, +) + +from . import requirements, specifiers, utils, version as version_module + +T = typing.TypeVar("T") +if sys.version_info[:2] >= (3, 8): # pragma: no cover + from typing import Literal, TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from pip._vendor.typing_extensions import Literal, TypedDict + else: + try: + from pip._vendor.typing_extensions import Literal, TypedDict + except ImportError: + + class Literal: + def __init_subclass__(*_args, **_kwargs): + pass + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +try: + ExceptionGroup +except NameError: # pragma: no cover + + class ExceptionGroup(Exception): # noqa: N818 + """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. + + If :external:exc:`ExceptionGroup` is already defined by Python itself, + that version is used instead. + """ + + message: str + exceptions: List[Exception] + + def __init__(self, message: str, exceptions: List[Exception]) -> None: + self.message = message + self.exceptions = exceptions + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" + +else: # pragma: no cover + ExceptionGroup = ExceptionGroup + + +class InvalidMetadata(ValueError): + """A metadata field contains invalid data.""" + + field: str + """The name of the field that contains invalid data.""" + + def __init__(self, field: str, message: str) -> None: + self.field = field + super().__init__(message) + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. See :class:`Metadata` whose attributes + match the keys of this dictionary. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emitting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + +_DICT_FIELDS = { + "project_urls", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} +_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) or [] + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed + + +_NOT_FOUND = object() + + +# Keep the two values in sync. +_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] +_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] + +_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) + + +class _Validator(Generic[T]): + """Validate a metadata field. + + All _process_*() methods correspond to a core metadata field. The method is + called with the field's raw value. If the raw value is valid it is returned + in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). + If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause + as appropriate). + """ + + name: str + raw_name: str + added: _MetadataVersion + + def __init__( + self, + *, + added: _MetadataVersion = "1.0", + ) -> None: + self.added = added + + def __set_name__(self, _owner: "Metadata", name: str) -> None: + self.name = name + self.raw_name = _RAW_TO_EMAIL_MAPPING[name] + + def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: + # With Python 3.8, the caching can be replaced with functools.cached_property(). + # No need to check the cache as attribute lookup will resolve into the + # instance's __dict__ before __get__ is called. + cache = instance.__dict__ + value = instance._raw.get(self.name) + + # To make the _process_* methods easier, we'll check if the value is None + # and if this field is NOT a required attribute, and if both of those + # things are true, we'll skip the the converter. This will mean that the + # converters never have to deal with the None union. + if self.name in _REQUIRED_ATTRS or value is not None: + try: + converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") + except AttributeError: + pass + else: + value = converter(value) + + cache[self.name] = value + try: + del instance._raw[self.name] # type: ignore[misc] + except KeyError: + pass + + return cast(T, value) + + def _invalid_metadata( + self, msg: str, cause: Optional[Exception] = None + ) -> InvalidMetadata: + exc = InvalidMetadata( + self.raw_name, msg.format_map({"field": repr(self.raw_name)}) + ) + exc.__cause__ = cause + return exc + + def _process_metadata_version(self, value: str) -> _MetadataVersion: + # Implicitly makes Metadata-Version required. + if value not in _VALID_METADATA_VERSIONS: + raise self._invalid_metadata(f"{value!r} is not a valid metadata version") + return cast(_MetadataVersion, value) + + def _process_name(self, value: str) -> str: + if not value: + raise self._invalid_metadata("{field} is a required field") + # Validate the name as a side-effect. + try: + utils.canonicalize_name(value, validate=True) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + else: + return value + + def _process_version(self, value: str) -> version_module.Version: + if not value: + raise self._invalid_metadata("{field} is a required field") + try: + return version_module.parse(value) + except version_module.InvalidVersion as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_summary(self, value: str) -> str: + """Check the field contains no newlines.""" + if "\n" in value: + raise self._invalid_metadata("{field} must be a single line") + return value + + def _process_description_content_type(self, value: str) -> str: + content_types = {"text/plain", "text/x-rst", "text/markdown"} + message = email.message.EmailMessage() + message["content-type"] = value + + content_type, parameters = ( + # Defaults to `text/plain` if parsing failed. + message.get_content_type().lower(), + message["content-type"].params, + ) + # Check if content-type is valid or defaulted to `text/plain` and thus was + # not parseable. + if content_type not in content_types or content_type not in value.lower(): + raise self._invalid_metadata( + f"{{field}} must be one of {list(content_types)}, not {value!r}" + ) + + charset = parameters.get("charset", "UTF-8") + if charset != "UTF-8": + raise self._invalid_metadata( + f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" + ) + + markdown_variants = {"GFM", "CommonMark"} + variant = parameters.get("variant", "GFM") # Use an acceptable default. + if content_type == "text/markdown" and variant not in markdown_variants: + raise self._invalid_metadata( + f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " + f"not {variant!r}", + ) + return value + + def _process_dynamic(self, value: List[str]) -> List[str]: + for dynamic_field in map(str.lower, value): + if dynamic_field in {"name", "version", "metadata-version"}: + raise self._invalid_metadata( + f"{value!r} is not allowed as a dynamic field" + ) + elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: + raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") + return list(map(str.lower, value)) + + def _process_provides_extra( + self, + value: List[str], + ) -> List[utils.NormalizedName]: + normalized_names = [] + try: + for name in value: + normalized_names.append(utils.canonicalize_name(name, validate=True)) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{name!r} is invalid for {{field}}", cause=exc + ) + else: + return normalized_names + + def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: + try: + return specifiers.SpecifierSet(value) + except specifiers.InvalidSpecifier as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_requires_dist( + self, + value: List[str], + ) -> List[requirements.Requirement]: + reqs = [] + try: + for req in value: + reqs.append(requirements.Requirement(req)) + except requirements.InvalidRequirement as exc: + raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) + else: + return reqs + + +class Metadata: + """Representation of distribution metadata. + + Compared to :class:`RawMetadata`, this class provides objects representing + metadata fields instead of only using built-in types. Any invalid metadata + will cause :exc:`InvalidMetadata` to be raised (with a + :py:attr:`~BaseException.__cause__` attribute as appropriate). + """ + + _raw: RawMetadata + + @classmethod + def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": + """Create an instance from :class:`RawMetadata`. + + If *validate* is true, all metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + ins = cls() + ins._raw = data.copy() # Mutations occur due to caching enriched values. + + if validate: + exceptions: List[Exception] = [] + try: + metadata_version = ins.metadata_version + metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) + except InvalidMetadata as metadata_version_exc: + exceptions.append(metadata_version_exc) + metadata_version = None + + # Make sure to check for the fields that are present, the required + # fields (so their absence can be reported). + fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS + # Remove fields that have already been checked. + fields_to_check -= {"metadata_version"} + + for key in fields_to_check: + try: + if metadata_version: + # Can't use getattr() as that triggers descriptor protocol which + # will fail due to no value for the instance argument. + try: + field_metadata_version = cls.__dict__[key].added + except KeyError: + exc = InvalidMetadata(key, f"unrecognized field: {key!r}") + exceptions.append(exc) + continue + field_age = _VALID_METADATA_VERSIONS.index( + field_metadata_version + ) + if field_age > metadata_age: + field = _RAW_TO_EMAIL_MAPPING[key] + exc = InvalidMetadata( + field, + "{field} introduced in metadata version " + "{field_metadata_version}, not {metadata_version}", + ) + exceptions.append(exc) + continue + getattr(ins, key) + except InvalidMetadata as exc: + exceptions.append(exc) + + if exceptions: + raise ExceptionGroup("invalid metadata", exceptions) + + return ins + + @classmethod + def from_email( + cls, data: Union[bytes, str], *, validate: bool = True + ) -> "Metadata": + """Parse metadata from email headers. + + If *validate* is true, the metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + raw, unparsed = parse_email(data) + + if validate: + exceptions: list[Exception] = [] + for unparsed_key in unparsed: + if unparsed_key in _EMAIL_TO_RAW_MAPPING: + message = f"{unparsed_key!r} has invalid data" + else: + message = f"unrecognized field: {unparsed_key!r}" + exceptions.append(InvalidMetadata(unparsed_key, message)) + + if exceptions: + raise ExceptionGroup("unparsed", exceptions) + + try: + return cls.from_raw(raw, validate=validate) + except ExceptionGroup as exc_group: + raise ExceptionGroup( + "invalid or unparsed metadata", exc_group.exceptions + ) from None + + metadata_version: _Validator[_MetadataVersion] = _Validator() + """:external:ref:`core-metadata-metadata-version` + (required; validated to be a valid metadata version)""" + name: _Validator[str] = _Validator() + """:external:ref:`core-metadata-name` + (required; validated using :func:`~packaging.utils.canonicalize_name` and its + *validate* parameter)""" + version: _Validator[version_module.Version] = _Validator() + """:external:ref:`core-metadata-version` (required)""" + dynamic: _Validator[Optional[List[str]]] = _Validator( + added="2.2", + ) + """:external:ref:`core-metadata-dynamic` + (validated against core metadata field names and lowercased)""" + platforms: _Validator[Optional[List[str]]] = _Validator() + """:external:ref:`core-metadata-platform`""" + supported_platforms: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """:external:ref:`core-metadata-supported-platform`""" + summary: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" + description: _Validator[Optional[str]] = _Validator() # TODO 2.1: can be in body + """:external:ref:`core-metadata-description`""" + description_content_type: _Validator[Optional[str]] = _Validator(added="2.1") + """:external:ref:`core-metadata-description-content-type` (validated)""" + keywords: _Validator[Optional[List[str]]] = _Validator() + """:external:ref:`core-metadata-keywords`""" + home_page: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-home-page`""" + download_url: _Validator[Optional[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-download-url`""" + author: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-author`""" + author_email: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-author-email`""" + maintainer: _Validator[Optional[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer`""" + maintainer_email: _Validator[Optional[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer-email`""" + license: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-license`""" + classifiers: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """:external:ref:`core-metadata-classifier`""" + requires_dist: _Validator[Optional[List[requirements.Requirement]]] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-dist`""" + requires_python: _Validator[Optional[specifiers.SpecifierSet]] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-python`""" + # Because `Requires-External` allows for non-PEP 440 version specifiers, we + # don't do any processing on the values. + requires_external: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-external`""" + project_urls: _Validator[Optional[Dict[str, str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-project-url`""" + # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation + # regardless of metadata version. + provides_extra: _Validator[Optional[List[utils.NormalizedName]]] = _Validator( + added="2.1", + ) + """:external:ref:`core-metadata-provides-extra`""" + provides_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-provides-dist`""" + obsoletes_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-obsoletes-dist`""" + requires: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Requires`` (deprecated)""" + provides: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Provides`` (deprecated)""" + obsoletes: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Obsoletes`` (deprecated)""" diff --git a/src/pip/_vendor/packaging/requirements.py b/src/pip/_vendor/packaging/requirements.py index 1eab7dd66d9..bdc43a7e98d 100644 --- a/src/pip/_vendor/packaging/requirements.py +++ b/src/pip/_vendor/packaging/requirements.py @@ -2,26 +2,13 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -import re -import string -import urllib.parse -from typing import List, Optional as TOptional, Set - -from pip._vendor.pyparsing import ( # noqa - Combine, - Literal as L, - Optional, - ParseException, - Regex, - Word, - ZeroOrMore, - originalTextFor, - stringEnd, - stringStart, -) - -from .markers import MARKER_EXPR, Marker -from .specifiers import LegacySpecifier, Specifier, SpecifierSet +from typing import Any, Iterator, Optional, Set + +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet +from .utils import canonicalize_name class InvalidRequirement(ValueError): @@ -30,60 +17,6 @@ class InvalidRequirement(ValueError): """ -ALPHANUM = Word(string.ascii_letters + string.digits) - -LBRACKET = L("[").suppress() -RBRACKET = L("]").suppress() -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() -COMMA = L(",").suppress() -SEMICOLON = L(";").suppress() -AT = L("@").suppress() - -PUNCTUATION = Word("-_.") -IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) -IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) - -NAME = IDENTIFIER("name") -EXTRA = IDENTIFIER - -URI = Regex(r"[^ ]+")("url") -URL = AT + URI - -EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) -EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") - -VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) -VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) - -VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY -VERSION_MANY = Combine( - VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False -)("_raw_spec") -_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) -_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") - -VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") -VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) - -MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") -MARKER_EXPR.setParseAction( - lambda s, l, t: Marker(s[t._original_start : t._original_end]) -) -MARKER_SEPARATOR = SEMICOLON -MARKER = MARKER_SEPARATOR + MARKER_EXPR - -VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) -URL_AND_MARKER = URL + Optional(MARKER) - -NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) - -REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd -# pyparsing isn't thread safe during initialization, so we do it eagerly, see -# issue #104 -REQUIREMENT.parseString("x[]") - - class Requirement: """Parse a requirement. @@ -99,48 +32,59 @@ class Requirement: def __init__(self, requirement_string: str) -> None: try: - req = REQUIREMENT.parseString(requirement_string) - except ParseException as e: - raise InvalidRequirement( - f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' - ) - - self.name: str = req.name - if req.url: - parsed_url = urllib.parse.urlparse(req.url) - if parsed_url.scheme == "file": - if urllib.parse.urlunparse(parsed_url) != req.url: - raise InvalidRequirement("Invalid URL given") - elif not (parsed_url.scheme and parsed_url.netloc) or ( - not parsed_url.scheme and not parsed_url.netloc - ): - raise InvalidRequirement(f"Invalid URL: {req.url}") - self.url: TOptional[str] = req.url - else: - self.url = None - self.extras: Set[str] = set(req.extras.asList() if req.extras else []) - self.specifier: SpecifierSet = SpecifierSet(req.specifier) - self.marker: TOptional[Marker] = req.marker if req.marker else None - - def __str__(self) -> str: - parts: List[str] = [self.name] + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e + + self.name: str = parsed.name + self.url: Optional[str] = parsed.url or None + self.extras: Set[str] = set(parsed.extras or []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) + + def _iter_parts(self, name: str) -> Iterator[str]: + yield name if self.extras: formatted_extras = ",".join(sorted(self.extras)) - parts.append(f"[{formatted_extras}]") + yield f"[{formatted_extras}]" if self.specifier: - parts.append(str(self.specifier)) + yield str(self.specifier) if self.url: - parts.append(f"@ {self.url}") + yield f"@ {self.url}" if self.marker: - parts.append(" ") + yield " " if self.marker: - parts.append(f"; {self.marker}") + yield f"; {self.marker}" - return "".join(parts) + def __str__(self) -> str: + return "".join(self._iter_parts(self.name)) def __repr__(self) -> str: return f"" + + def __hash__(self) -> int: + return hash( + ( + self.__class__.__name__, + *self._iter_parts(canonicalize_name(self.name)), + ) + ) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + canonicalize_name(self.name) == canonicalize_name(other.name) + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/src/pip/_vendor/packaging/specifiers.py b/src/pip/_vendor/packaging/specifiers.py index 0e218a6f9f7..3a6497e44e6 100644 --- a/src/pip/_vendor/packaging/specifiers.py +++ b/src/pip/_vendor/packaging/specifiers.py @@ -1,38 +1,41 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from pip._vendor.packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from pip._vendor.packaging.version import Version +""" import abc -import functools import itertools import re -import warnings -from typing import ( - Callable, - Dict, - Iterable, - Iterator, - List, - Optional, - Pattern, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Callable, Iterable, Iterator, List, Optional, Tuple, TypeVar, Union from .utils import canonicalize_version -from .version import LegacyVersion, Version, parse +from .version import Version + +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] -ParsedVersion = Union[Version, LegacyVersion] -UnparsedVersion = Union[Version, LegacyVersion, str] -VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion) -CallableOperator = Callable[[ParsedVersion, str], bool] + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version class InvalidSpecifier(ValueError): """ - An invalid specifier was found, users should refer to PEP 440. + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' """ @@ -40,35 +43,39 @@ class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod def __str__(self) -> str: """ - Returns the str representation of this Specifier like object. This + Returns the str representation of this Specifier-like object. This should be representative of the Specifier itself. """ @abc.abstractmethod def __hash__(self) -> int: """ - Returns a hash value for this Specifier like object. + Returns a hash value for this Specifier-like object. """ @abc.abstractmethod def __eq__(self, other: object) -> bool: """ - Returns a boolean representing whether or not the two Specifier like + Returns a boolean representing whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. """ - @abc.abstractproperty + @property + @abc.abstractmethod def prereleases(self) -> Optional[bool]: - """ - Returns whether or not pre-releases as a whole are allowed by this - specifier. + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. """ @prereleases.setter def prereleases(self, value: bool) -> None: - """ - Sets whether or not pre-releases as a whole are allowed by this - specifier. + """Setter for :attr:`prereleases`. + + :param value: The value to set. """ @abc.abstractmethod @@ -79,227 +86,28 @@ def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: @abc.abstractmethod def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: """ Takes an iterable of items and filters them so that only items which are contained within this specifier are allowed in it. """ -class _IndividualSpecifier(BaseSpecifier): - - _operators: Dict[str, str] = {} - _regex: Pattern[str] - - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - match = self._regex.search(spec) - if not match: - raise InvalidSpecifier(f"Invalid specifier: '{spec}'") - - self._spec: Tuple[str, str] = ( - match.group("operator").strip(), - match.group("version").strip(), - ) - - # Store whether or not this Specifier should accept prereleases - self._prereleases = prereleases - - def __repr__(self) -> str: - pre = ( - f", prereleases={self.prereleases!r}" - if self._prereleases is not None - else "" - ) - - return f"<{self.__class__.__name__}({str(self)!r}{pre})>" - - def __str__(self) -> str: - return "{}{}".format(*self._spec) - - @property - def _canonical_spec(self) -> Tuple[str, str]: - return self._spec[0], canonicalize_version(self._spec[1]) - - def __hash__(self) -> int: - return hash(self._canonical_spec) - - def __eq__(self, other: object) -> bool: - if isinstance(other, str): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._canonical_spec == other._canonical_spec - - def _get_operator(self, op: str) -> CallableOperator: - operator_callable: CallableOperator = getattr( - self, f"_compare_{self._operators[op]}" - ) - return operator_callable - - def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion: - if not isinstance(version, (LegacyVersion, Version)): - version = parse(version) - return version - - @property - def operator(self) -> str: - return self._spec[0] - - @property - def version(self) -> str: - return self._spec[1] - - @property - def prereleases(self) -> Optional[bool]: - return self._prereleases - - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value - - def __contains__(self, item: str) -> bool: - return self.contains(item) - - def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None - ) -> bool: - - # Determine if prereleases are to be allowed or not. - if prereleases is None: - prereleases = self.prereleases - - # Normalize item to a Version or LegacyVersion, this allows us to have - # a shortcut for ``"2.0" in Specifier(">=2") - normalized_item = self._coerce_version(item) - - # Determine if we should be supporting prereleases in this specifier - # or not, if we do not support prereleases than we can short circuit - # logic if this version is a prereleases. - if normalized_item.is_prerelease and not prereleases: - return False - - # Actually do the comparison to determine if this item is contained - # within this Specifier or not. - operator_callable: CallableOperator = self._get_operator(self.operator) - return operator_callable(normalized_item, self.version) - - def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: - - yielded = False - found_prereleases = [] - - kw = {"prereleases": prereleases if prereleases is not None else True} - - # Attempt to iterate over all the values in the iterable and if any of - # them match, yield them. - for version in iterable: - parsed_version = self._coerce_version(version) - - if self.contains(parsed_version, **kw): - # If our version is a prerelease, and we were not set to allow - # prereleases, then we'll store it for later in case nothing - # else matches this specifier. - if parsed_version.is_prerelease and not ( - prereleases or self.prereleases - ): - found_prereleases.append(version) - # Either this is not a prerelease, or we should have been - # accepting prereleases from the beginning. - else: - yielded = True - yield version - - # Now that we've iterated over everything, determine if we've yielded - # any values, and if we have not and we have any prereleases stored up - # then we will go ahead and yield the prereleases. - if not yielded and found_prereleases: - for version in found_prereleases: - yield version - - -class LegacySpecifier(_IndividualSpecifier): - - _regex_str = r""" - (?P(==|!=|<=|>=|<|>)) - \s* - (?P - [^,;\s)]* # Since this is a "legacy" specifier, and the version - # string can be just about anything, we match everything - # except for whitespace, a semi-colon for marker support, - # a closing paren since versions can be enclosed in - # them, and a comma since it's a version separator. - ) - """ - - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) - - _operators = { - "==": "equal", - "!=": "not_equal", - "<=": "less_than_equal", - ">=": "greater_than_equal", - "<": "less_than", - ">": "greater_than", - } - - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - super().__init__(spec, prereleases) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion: - if not isinstance(version, LegacyVersion): - version = LegacyVersion(str(version)) - return version - - def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective == self._coerce_version(spec) - - def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective != self._coerce_version(spec) - - def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective <= self._coerce_version(spec) - - def _compare_greater_than_equal( - self, prospective: LegacyVersion, spec: str - ) -> bool: - return prospective >= self._coerce_version(spec) - - def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective < self._coerce_version(spec) - - def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective > self._coerce_version(spec) - - -def _require_version_compare( - fn: Callable[["Specifier", ParsedVersion, str], bool] -) -> Callable[["Specifier", ParsedVersion, str], bool]: - @functools.wraps(fn) - def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool: - if not isinstance(prospective, Version): - return False - return fn(self, prospective, spec) - - return wrapped +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. + .. tip:: -class Specifier(_IndividualSpecifier): + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ - _regex_str = r""" + _operator_regex_str = r""" (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" (?P (?: # The identity operators allow for an escape hatch that will @@ -309,8 +117,10 @@ class Specifier(_IndividualSpecifier): # but included entirely as an escape hatch. (?<====) # Only match for the identity operator \s* - [^\s]* # We just match everything, except for whitespace - # since we are only testing for strict identity. + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. ) | (?: @@ -323,23 +133,23 @@ class Specifier(_IndividualSpecifier): v? (?:[0-9]+!)? # epoch [0-9]+(?:\.[0-9]+)* # release - (?: # pre release - [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) - [-_\.]? - [0-9]* - )? - (?: # post release - (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) - )? - # You cannot use a wild card and a dev or local version - # together so group them with a | and make them optional. + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local - | - \.\* # Wild card syntax of .* )? ) | @@ -354,7 +164,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -379,7 +189,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)* # release (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -391,7 +201,10 @@ class Specifier(_IndividualSpecifier): ) """ - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$", + re.VERBOSE | re.IGNORECASE, + ) _operators = { "~=": "compatible", @@ -404,8 +217,153 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } - @_require_version_compare - def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: # Compatible releases have an equivalent combination of >= and ==. That # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to @@ -415,7 +373,7 @@ def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: # We want everything but the last item in the version, but we want to # ignore suffix segments. - prefix = ".".join( + prefix = _version_join( list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] ) @@ -426,34 +384,35 @@ def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: prospective, prefix ) - @_require_version_compare - def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_equal(self, prospective: Version, spec: str) -> bool: # We need special logic to handle prefix matching if spec.endswith(".*"): # In the case of prefix matching we want to ignore local segment. - prospective = Version(prospective.public) - # Split the spec out by dots, and pretend that there is an implicit - # dot in between a release segment and a pre-release segment. - split_spec = _version_split(spec[:-2]) # Remove the trailing .* + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) + # Split the spec out by bangs and dots, and pretend that there is + # an implicit dot in between a release segment and a pre-release segment. + split_spec = _version_split(normalized_spec) - # Split the prospective version out by dots, and pretend that there - # is an implicit dot in between a release segment and a pre-release - # segment. - split_prospective = _version_split(str(prospective)) + # Split the prospective version out by bangs and dots, and pretend + # that there is an implicit dot in between a release segment and + # a pre-release segment. + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) # Shorten the prospective version to be the same length as the spec # so that we can determine if the specifier is a prefix of the # prospective version or not. - shortened_prospective = split_prospective[: len(split_spec)] - - # Pad out our two sides with zeros so that they both equal the same - # length. - padded_spec, padded_prospective = _pad_version( - split_spec, shortened_prospective - ) + shortened_prospective = padded_prospective[: len(split_spec)] - return padded_prospective == padded_spec + return shortened_prospective == split_spec else: # Convert our spec string into a Version spec_version = Version(spec) @@ -466,30 +425,24 @@ def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: return prospective == spec_version - @_require_version_compare - def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: return not self._compare_equal(prospective, spec) - @_require_version_compare - def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) <= Version(spec) - @_require_version_compare - def _compare_greater_than_equal( - self, prospective: ParsedVersion, spec: str - ) -> bool: + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) >= Version(spec) - @_require_version_compare - def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -514,8 +467,7 @@ def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: # version in the spec. return True - @_require_version_compare - def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -549,42 +501,152 @@ def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bo def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: return str(prospective).lower() == str(spec).lower() - @property - def prereleases(self) -> bool: + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. - # If there is an explicit prereleases set for this, then we'll just - # blindly use that. - if self._prereleases is not None: - return self._prereleases + :param item: The item to check for. - # Look at all of our specifiers and determine if they are inclusive - # operators, and if they are if they are including an explicit - # prerelease. - operator, version = self._spec - if operator in ["==", ">=", "<=", "~=", "==="]: - # The == specifier can include a trailing .*, if it does we - # want to remove before parsing. - if operator == "==" and version.endswith(".*"): - version = version[:-2] + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. - # Parse the version, and if it is a pre-release than this - # specifier allows pre-releases. - if parse(version).is_prerelease: - return True + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) - return False + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version _prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") def _version_split(version: str) -> List[str]: + """Split version into components. + + The split components are intended for version comparison. The logic does + not attempt to retain the original version string, so joining the + components back with :func:`_version_join` may not produce the original + version string. + """ result: List[str] = [] - for item in version.split("."): + + epoch, _, rest = version.rpartition("!") + result.append(epoch or "0") + + for item in rest.split("."): match = _prefix_regex.search(item) if match: result.extend(match.groups()) @@ -593,6 +655,17 @@ def _version_split(version: str) -> List[str]: return result +def _version_join(components: List[str]) -> str: + """Join split version components into a version string. + + This function assumes the input came from :func:`_version_split`, where the + first component must be the epoch (either empty or numeric), and all other + components numeric. + """ + epoch, *rest = components + return f"{epoch}!{'.'.join(rest)}" + + def _is_not_suffix(segment: str) -> bool: return not any( segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") @@ -614,35 +687,82 @@ def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0]))) right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0]))) - return (list(itertools.chain(*left_split)), list(itertools.chain(*right_split))) + return ( + list(itertools.chain.from_iterable(left_split)), + list(itertools.chain.from_iterable(right_split)), + ) class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + def __init__( self, specifiers: str = "", prereleases: Optional[bool] = None ) -> None: + """Initialize a SpecifierSet instance. + + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ - # Split on , to break each individual specifier into it's own item, and + # Split on `,` to break each individual specifier into it's own item, and # strip each item to remove leading/trailing whitespace. split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] - # Parsed each individual specifier, attempting first to make it a - # Specifier and falling back to a LegacySpecifier. - parsed: Set[_IndividualSpecifier] = set() - for specifier in split_specifiers: - try: - parsed.add(Specifier(specifier)) - except InvalidSpecifier: - parsed.add(LegacySpecifier(specifier)) - - # Turn our parsed specifiers into a frozen set and save them for later. - self._specs = frozenset(parsed) + # Make each individual specifier a Specifier and save in a frozen set for later. + self._specs = frozenset(map(Specifier, split_specifiers)) # Store our prereleases value so we can use it later to determine if # we accept prereleases or not. self._prereleases = prereleases + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ pre = ( f", prereleases={self.prereleases!r}" if self._prereleases is not None @@ -652,12 +772,31 @@ def __repr__(self) -> str: return f"" def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ return ",".join(sorted(str(s) for s in self._specs)) def __hash__(self) -> int: return hash(self._specs) def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ if isinstance(other, str): other = SpecifierSet(other) elif not isinstance(other, SpecifierSet): @@ -681,7 +820,25 @@ def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": return specifier def __eq__(self, other: object) -> bool: - if isinstance(other, (str, _IndividualSpecifier)): + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): other = SpecifierSet(str(other)) elif not isinstance(other, SpecifierSet): return NotImplemented @@ -689,43 +846,72 @@ def __eq__(self, other: object) -> bool: return self._specs == other._specs def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" return len(self._specs) - def __iter__(self) -> Iterator[_IndividualSpecifier]: - return iter(self._specs) - - @property - def prereleases(self) -> Optional[bool]: - - # If we have been given an explicit prerelease modifier, then we'll - # pass that through here. - if self._prereleases is not None: - return self._prereleases - - # If we don't have any specifiers, and we don't have a forced value, - # then we'll just return None since we don't know if this should have - # pre-releases or not. - if not self._specs: - return None - - # Otherwise we'll see if any of the given specifiers accept - # prereleases, if any of them do we'll return True, otherwise False. - return any(s.prereleases for s in self._specs) + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ + return iter(self._specs) def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ return self.contains(item) def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, ) -> bool: - - # Ensure that our item is a Version or LegacyVersion instance. - if not isinstance(item, (LegacyVersion, Version)): - item = parse(item) + """Return whether or not the item is contained in this SpecifierSet. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the @@ -742,6 +928,9 @@ def contains( if not prereleases and item.is_prerelease: return False + if installed and item.is_prerelease: + item = Version(item.base_version) + # We simply dispatch to the underlying specs here to make sure that the # given version is contained within all of them. # Note: This use of all() here means that an empty set of specifiers @@ -749,9 +938,46 @@ def contains( return all(s.contains(item, prereleases=prereleases) for s in self._specs) def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: - + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the # SpecifierSet thinks for whether or not we should support prereleases. @@ -764,27 +990,16 @@ def filter( if self._specs: for spec in self._specs: iterable = spec.filter(iterable, prereleases=bool(prereleases)) - return iterable + return iter(iterable) # If we do not have any specifiers, then we need to have a rough filter # which will filter out any pre-releases, unless there are no final - # releases, and which will filter out LegacyVersion in general. + # releases. else: - filtered: List[VersionTypeVar] = [] - found_prereleases: List[VersionTypeVar] = [] - - item: UnparsedVersion - parsed_version: Union[Version, LegacyVersion] + filtered: List[UnparsedVersionVar] = [] + found_prereleases: List[UnparsedVersionVar] = [] for item in iterable: - # Ensure that we some kind of Version class for this item. - if not isinstance(item, (LegacyVersion, Version)): - parsed_version = parse(item) - else: - parsed_version = item - - # Filter out any item which is parsed as a LegacyVersion - if isinstance(parsed_version, LegacyVersion): - continue + parsed_version = _coerce_version(item) # Store any item which is a pre-release for later unless we've # already found a final version or we are accepting prereleases @@ -797,6 +1012,6 @@ def filter( # If we've found no items except for pre-releases, then we'll go # ahead and use the pre-releases if not filtered and found_prereleases and prereleases is None: - return found_prereleases + return iter(found_prereleases) - return filtered + return iter(filtered) diff --git a/src/pip/_vendor/packaging/tags.py b/src/pip/_vendor/packaging/tags.py index 9a3d25a71c7..89f1926137d 100644 --- a/src/pip/_vendor/packaging/tags.py +++ b/src/pip/_vendor/packaging/tags.py @@ -4,6 +4,9 @@ import logging import platform +import re +import struct +import subprocess import sys import sysconfig from importlib.machinery import EXTENSION_SUFFIXES @@ -36,7 +39,7 @@ } -_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 +_32_BIT_INTERPRETER = struct.calcsize("P") == 4 class Tag: @@ -110,7 +113,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]: def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: - value = sysconfig.get_config_var(name) + value: Union[int, str, None] = sysconfig.get_config_var(name) if value is None and warn: logger.debug( "Config variable '%s' is unset, Python ABI tag may be incorrect", name @@ -119,23 +122,40 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: def _normalize_string(string: str) -> str: - return string.replace(".", "_").replace("-", "_") + return string.replace(".", "_").replace("-", "_").replace(" ", "_") -def _abi3_applies(python_version: PythonVersion) -> bool: +def _is_threaded_cpython(abis: List[str]) -> bool: + """ + Determine if the ABI corresponds to a threaded (`--disable-gil`) build. + + The threaded builds are indicated by a "t" in the abiflags. + """ + if len(abis) == 0: + return False + # expect e.g., cp313 + m = re.match(r"cp\d+(.*)", abis[0]) + if not m: + return False + abiflags = m.group(1) + return "t" in abiflags + + +def _abi3_applies(python_version: PythonVersion, threading: bool) -> bool: """ Determine if the Python version supports abi3. - PEP 384 was first implemented in Python 3.2. + PEP 384 was first implemented in Python 3.2. The threaded (`--disable-gil`) + builds do not support abi3. """ - return len(python_version) > 1 and tuple(python_version) >= (3, 2) + return len(python_version) > 1 and tuple(python_version) >= (3, 2) and not threading def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: py_version = tuple(py_version) # To allow for version comparison. abis = [] version = _version_nodot(py_version[:2]) - debug = pymalloc = ucs4 = "" + threading = debug = pymalloc = ucs4 = "" with_debug = _get_config_var("Py_DEBUG", warn) has_refcount = hasattr(sys, "gettotalrefcount") # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled @@ -144,6 +164,8 @@ def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: has_ext = "_d.pyd" in EXTENSION_SUFFIXES if with_debug or (with_debug is None and (has_refcount or has_ext)): debug = "d" + if py_version >= (3, 13) and _get_config_var("Py_GIL_DISABLED", warn): + threading = "t" if py_version < (3, 8): with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) if with_pymalloc or with_pymalloc is None: @@ -157,13 +179,8 @@ def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: elif debug: # Debug builds can also load "normal" extension modules. # We can also assume no UCS-4 or pymalloc requirement. - abis.append(f"cp{version}") - abis.insert( - 0, - "cp{version}{debug}{pymalloc}{ucs4}".format( - version=version, debug=debug, pymalloc=pymalloc, ucs4=ucs4 - ), - ) + abis.append(f"cp{version}{threading}") + abis.insert(0, f"cp{version}{threading}{debug}{pymalloc}{ucs4}") return abis @@ -211,11 +228,14 @@ def cpython_tags( for abi in abis: for platform_ in platforms: yield Tag(interpreter, abi, platform_) - if _abi3_applies(python_version): + + threading = _is_threaded_cpython(abis) + use_abi3 = _abi3_applies(python_version, threading) + if use_abi3: yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) - if _abi3_applies(python_version): + if use_abi3: for minor_version in range(python_version[1] - 1, 1, -1): for platform_ in platforms: interpreter = "cp{version}".format( @@ -224,10 +244,45 @@ def cpython_tags( yield Tag(interpreter, "abi3", platform_) -def _generic_abi() -> Iterator[str]: - abi = sysconfig.get_config_var("SOABI") - if abi: - yield _normalize_string(abi) +def _generic_abi() -> List[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] def generic_tags( @@ -251,8 +306,9 @@ def generic_tags( interpreter = "".join([interp_name, interp_version]) if abis is None: abis = _generic_abi() + else: + abis = list(abis) platforms = list(platforms or platform_tags()) - abis = list(abis) if "none" not in abis: abis.append("none") for abi in abis: @@ -356,6 +412,22 @@ def mac_platforms( version_str, _, cpu_arch = platform.mac_ver() if version is None: version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + text=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) else: version = version if arch is None: @@ -416,15 +488,21 @@ def mac_platforms( def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: linux = _normalize_string(sysconfig.get_platform()) + if not linux.startswith("linux_"): + # we should never be here, just yield the sysconfig one and return + yield linux + return if is_32bit: if linux == "linux_x86_64": linux = "linux_i686" elif linux == "linux_aarch64": - linux = "linux_armv7l" + linux = "linux_armv8l" _, arch = linux.split("_", 1) - yield from _manylinux.platform_tags(linux, arch) - yield from _musllinux.platform_tags(arch) - yield linux + archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch]) + yield from _manylinux.platform_tags(archs) + yield from _musllinux.platform_tags(archs) + for arch in archs: + yield f"linux_{arch}" def _generic_platforms() -> Iterator[str]: @@ -446,6 +524,9 @@ def platform_tags() -> Iterator[str]: def interpreter_name() -> str: """ Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. """ name = sys.implementation.name return INTERPRETER_SHORT_NAMES.get(name) or name @@ -482,6 +563,9 @@ def sys_tags(*, warn: bool = False) -> Iterator[Tag]: yield from generic_tags() if interp_name == "pp": - yield from compatible_tags(interpreter="pp3") + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) else: - yield from compatible_tags() + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/src/pip/_vendor/packaging/utils.py b/src/pip/_vendor/packaging/utils.py index bab11b80c60..c2c2f75aa80 100644 --- a/src/pip/_vendor/packaging/utils.py +++ b/src/pip/_vendor/packaging/utils.py @@ -12,6 +12,12 @@ NormalizedName = NewType("NormalizedName", str) +class InvalidName(ValueError): + """ + An invalid distribution name; users should refer to the packaging user guide. + """ + + class InvalidWheelFilename(ValueError): """ An invalid wheel filename was found, users should refer to PEP 427. @@ -24,18 +30,31 @@ class InvalidSdistFilename(ValueError): """ +# Core metadata spec for `Name` +_validate_regex = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE +) _canonicalize_regex = re.compile(r"[-_.]+") +_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$") # PEP 427: The build number must start with a digit. _build_tag_regex = re.compile(r"(\d+)(.*)") -def canonicalize_name(name: str) -> NormalizedName: +def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName: + if validate and not _validate_regex.match(name): + raise InvalidName(f"name is invalid: {name!r}") # This is taken from PEP 503. value = _canonicalize_regex.sub("-", name).lower() return cast(NormalizedName, value) -def canonicalize_version(version: Union[Version, str]) -> str: +def is_normalized_name(name: str) -> bool: + return _normalized_regex.match(name) is not None + + +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: """ This is very similar to Version.__str__, but has one subtle difference with the way it handles the release segment. @@ -56,8 +75,11 @@ def canonicalize_version(version: Union[Version, str]) -> str: parts.append(f"{parsed.epoch}!") # Release segment - # NB: This strips trailing '.0's to normalize - parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release))) + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) # Pre-release if parsed.pre is not None: @@ -95,11 +117,18 @@ def parse_wheel_filename( parts = filename.split("-", dashes - 2) name_part = parts[0] - # See PEP 427 for the rules on escaping the project name + # See PEP 427 for the rules on escaping the project name. if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: raise InvalidWheelFilename(f"Invalid project name: {filename}") name = canonicalize_name(name_part) - version = Version(parts[1]) + + try: + version = Version(parts[1]) + except InvalidVersion as e: + raise InvalidWheelFilename( + f"Invalid wheel filename (invalid version): {filename}" + ) from e + if dashes == 5: build_part = parts[2] build_match = _build_tag_regex.match(build_part) @@ -132,5 +161,12 @@ def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") name = canonicalize_name(name_part) - version = Version(version_part) + + try: + version = Version(version_part) + except InvalidVersion as e: + raise InvalidSdistFilename( + f"Invalid sdist filename (invalid version): {filename}" + ) from e + return (name, version) diff --git a/src/pip/_vendor/packaging/version.py b/src/pip/_vendor/packaging/version.py index de9a09a4ed3..6978e2843fa 100644 --- a/src/pip/_vendor/packaging/version.py +++ b/src/pip/_vendor/packaging/version.py @@ -1,64 +1,71 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from pip._vendor.packaging.version import parse, Version +""" -import collections import itertools import re -import warnings -from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union +from typing import Any, Callable, NamedTuple, Optional, SupportsInt, Tuple, Union from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType -__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] -InfiniteTypes = Union[InfinityType, NegativeInfinityType] -PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] -SubLocalType = Union[InfiniteTypes, int, str] -LocalType = Union[ +LocalType = Tuple[Union[int, str], ...] + +CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]] +CmpLocalType = Union[ NegativeInfinityType, - Tuple[ - Union[ - SubLocalType, - Tuple[SubLocalType, str], - Tuple[NegativeInfinityType, SubLocalType], - ], - ..., - ], + Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...], ] CmpKey = Tuple[ - int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType -] -LegacyCmpKey = Tuple[int, Tuple[str, ...]] -VersionComparisonMethod = Callable[ - [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool + int, + Tuple[int, ...], + CmpPrePostDevType, + CmpPrePostDevType, + CmpPrePostDevType, + CmpLocalType, ] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] -_Version = collections.namedtuple( - "_Version", ["epoch", "release", "dev", "pre", "post", "local"] -) +class _Version(NamedTuple): + epoch: int + release: Tuple[int, ...] + dev: Optional[Tuple[str, int]] + pre: Optional[Tuple[str, int]] + post: Optional[Tuple[str, int]] + local: Optional[LocalType] -def parse(version: str) -> Union["LegacyVersion", "Version"]: - """ - Parse the given version string and return either a :class:`Version` object - or a :class:`LegacyVersion` object depending on if the given version is - a valid PEP 440 version or a legacy version. + +def parse(version: str) -> "Version": + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. """ - try: - return Version(version) - except InvalidVersion: - return LegacyVersion(version) + return Version(version) class InvalidVersion(ValueError): - """ - An invalid version was found, users should refer to PEP 440. + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' """ class _BaseVersion: - _key: Union[CmpKey, LegacyCmpKey] + _key: Tuple[Any, ...] def __hash__(self) -> int: return hash(self._key) @@ -103,133 +110,16 @@ def __ne__(self, other: object) -> bool: return self._key != other._key -class LegacyVersion(_BaseVersion): - def __init__(self, version: str) -> None: - self._version = str(version) - self._key = _legacy_cmpkey(self._version) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def __str__(self) -> str: - return self._version - - def __repr__(self) -> str: - return f"" - - @property - def public(self) -> str: - return self._version - - @property - def base_version(self) -> str: - return self._version - - @property - def epoch(self) -> int: - return -1 - - @property - def release(self) -> None: - return None - - @property - def pre(self) -> None: - return None - - @property - def post(self) -> None: - return None - - @property - def dev(self) -> None: - return None - - @property - def local(self) -> None: - return None - - @property - def is_prerelease(self) -> bool: - return False - - @property - def is_postrelease(self) -> bool: - return False - - @property - def is_devrelease(self) -> bool: - return False - - -_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) - -_legacy_version_replacement_map = { - "pre": "c", - "preview": "c", - "-": "final-", - "rc": "c", - "dev": "@", -} - - -def _parse_version_parts(s: str) -> Iterator[str]: - for part in _legacy_version_component_re.split(s): - part = _legacy_version_replacement_map.get(part, part) - - if not part or part == ".": - continue - - if part[:1] in "0123456789": - # pad for numeric comparison - yield part.zfill(8) - else: - yield "*" + part - - # ensure that alpha/beta/candidate are before final - yield "*final" - - -def _legacy_cmpkey(version: str) -> LegacyCmpKey: - - # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch - # greater than or equal to 0. This will effectively put the LegacyVersion, - # which uses the defacto standard originally implemented by setuptools, - # as before all PEP 440 versions. - epoch = -1 - - # This scheme is taken from pkg_resources.parse_version setuptools prior to - # it's adoption of the packaging library. - parts: List[str] = [] - for part in _parse_version_parts(version.lower()): - if part.startswith("*"): - # remove "-" before a prerelease tag - if part < "*final": - while parts and parts[-1] == "*final-": - parts.pop() - - # remove trailing zeros from each series of numeric parts - while parts and parts[-1] == "00000000": - parts.pop() - - parts.append(part) - - return epoch, tuple(parts) - - # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse -VERSION_PATTERN = r""" +_VERSION_PATTERN = r""" v? (?: (?:(?P[0-9]+)!)? # epoch (?P[0-9]+(?:\.[0-9]+)*) # release segment (?P
                                          # pre-release
             [-_\.]?
-            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            (?Palpha|a|beta|b|preview|pre|c|rc)
             [-_\.]?
             (?P[0-9]+)?
         )?
@@ -253,12 +143,56 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey:
     (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
 """
 
+VERSION_PATTERN = _VERSION_PATTERN
+"""
+A string containing the regular expression used to match a valid version.
+
+The pattern is not anchored at either end, and is intended for embedding in larger
+expressions (for example, matching a version number as part of a file name). The
+regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
+flags set.
+
+:meta hide-value:
+"""
+
 
 class Version(_BaseVersion):
+    """This class abstracts handling of a project's versions.
+
+    A :class:`Version` instance is comparison aware and can be compared and
+    sorted using the standard Python interfaces.
+
+    >>> v1 = Version("1.0a5")
+    >>> v2 = Version("1.0")
+    >>> v1
+    
+    >>> v2
+    
+    >>> v1 < v2
+    True
+    >>> v1 == v2
+    False
+    >>> v1 > v2
+    False
+    >>> v1 >= v2
+    False
+    >>> v1 <= v2
+    True
+    """
 
     _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
 
     def __init__(self, version: str) -> None:
+        """Initialize a Version object.
+
+        :param version:
+            The string representation of a version which will be parsed and normalized
+            before use.
+        :raises InvalidVersion:
+            If the ``version`` does not conform to PEP 440 in any way then this
+            exception will be raised.
+        """
 
         # Validate the version and parse it into pieces
         match = self._regex.search(version)
@@ -288,9 +222,19 @@ def __init__(self, version: str) -> None:
         )
 
     def __repr__(self) -> str:
+        """A representation of the Version that shows all internal state.
+
+        >>> Version('1.0.0')
+        
+        """
         return f""
 
     def __str__(self) -> str:
+        """A string representation of the version that can be rounded-tripped.
+
+        >>> str(Version("1.0a5"))
+        '1.0a5'
+        """
         parts = []
 
         # Epoch
@@ -320,29 +264,77 @@ def __str__(self) -> str:
 
     @property
     def epoch(self) -> int:
-        _epoch: int = self._version.epoch
-        return _epoch
+        """The epoch of the version.
+
+        >>> Version("2.0.0").epoch
+        0
+        >>> Version("1!2.0.0").epoch
+        1
+        """
+        return self._version.epoch
 
     @property
     def release(self) -> Tuple[int, ...]:
-        _release: Tuple[int, ...] = self._version.release
-        return _release
+        """The components of the "release" segment of the version.
+
+        >>> Version("1.2.3").release
+        (1, 2, 3)
+        >>> Version("2.0.0").release
+        (2, 0, 0)
+        >>> Version("1!2.0.0.post0").release
+        (2, 0, 0)
+
+        Includes trailing zeroes but not the epoch or any pre-release / development /
+        post-release suffixes.
+        """
+        return self._version.release
 
     @property
     def pre(self) -> Optional[Tuple[str, int]]:
-        _pre: Optional[Tuple[str, int]] = self._version.pre
-        return _pre
+        """The pre-release segment of the version.
+
+        >>> print(Version("1.2.3").pre)
+        None
+        >>> Version("1.2.3a1").pre
+        ('a', 1)
+        >>> Version("1.2.3b1").pre
+        ('b', 1)
+        >>> Version("1.2.3rc1").pre
+        ('rc', 1)
+        """
+        return self._version.pre
 
     @property
     def post(self) -> Optional[int]:
+        """The post-release number of the version.
+
+        >>> print(Version("1.2.3").post)
+        None
+        >>> Version("1.2.3.post1").post
+        1
+        """
         return self._version.post[1] if self._version.post else None
 
     @property
     def dev(self) -> Optional[int]:
+        """The development number of the version.
+
+        >>> print(Version("1.2.3").dev)
+        None
+        >>> Version("1.2.3.dev1").dev
+        1
+        """
         return self._version.dev[1] if self._version.dev else None
 
     @property
     def local(self) -> Optional[str]:
+        """The local version segment of the version.
+
+        >>> print(Version("1.2.3").local)
+        None
+        >>> Version("1.2.3+abc").local
+        'abc'
+        """
         if self._version.local:
             return ".".join(str(x) for x in self._version.local)
         else:
@@ -350,10 +342,31 @@ def local(self) -> Optional[str]:
 
     @property
     def public(self) -> str:
+        """The public portion of the version.
+
+        >>> Version("1.2.3").public
+        '1.2.3'
+        >>> Version("1.2.3+abc").public
+        '1.2.3'
+        >>> Version("1.2.3+abc.dev1").public
+        '1.2.3'
+        """
         return str(self).split("+", 1)[0]
 
     @property
     def base_version(self) -> str:
+        """The "base version" of the version.
+
+        >>> Version("1.2.3").base_version
+        '1.2.3'
+        >>> Version("1.2.3+abc").base_version
+        '1.2.3'
+        >>> Version("1!1.2.3+abc.dev1").base_version
+        '1!1.2.3'
+
+        The "base version" is the public version of the project without any pre or post
+        release markers.
+        """
         parts = []
 
         # Epoch
@@ -367,31 +380,77 @@ def base_version(self) -> str:
 
     @property
     def is_prerelease(self) -> bool:
+        """Whether this version is a pre-release.
+
+        >>> Version("1.2.3").is_prerelease
+        False
+        >>> Version("1.2.3a1").is_prerelease
+        True
+        >>> Version("1.2.3b1").is_prerelease
+        True
+        >>> Version("1.2.3rc1").is_prerelease
+        True
+        >>> Version("1.2.3dev1").is_prerelease
+        True
+        """
         return self.dev is not None or self.pre is not None
 
     @property
     def is_postrelease(self) -> bool:
+        """Whether this version is a post-release.
+
+        >>> Version("1.2.3").is_postrelease
+        False
+        >>> Version("1.2.3.post1").is_postrelease
+        True
+        """
         return self.post is not None
 
     @property
     def is_devrelease(self) -> bool:
+        """Whether this version is a development release.
+
+        >>> Version("1.2.3").is_devrelease
+        False
+        >>> Version("1.2.3.dev1").is_devrelease
+        True
+        """
         return self.dev is not None
 
     @property
     def major(self) -> int:
+        """The first item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").major
+        1
+        """
         return self.release[0] if len(self.release) >= 1 else 0
 
     @property
     def minor(self) -> int:
+        """The second item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").minor
+        2
+        >>> Version("1").minor
+        0
+        """
         return self.release[1] if len(self.release) >= 2 else 0
 
     @property
     def micro(self) -> int:
+        """The third item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").micro
+        3
+        >>> Version("1").micro
+        0
+        """
         return self.release[2] if len(self.release) >= 3 else 0
 
 
 def _parse_letter_version(
-    letter: str, number: Union[str, bytes, SupportsInt]
+    letter: Optional[str], number: Union[str, bytes, SupportsInt, None]
 ) -> Optional[Tuple[str, int]]:
 
     if letter:
@@ -429,7 +488,7 @@ def _parse_letter_version(
 _local_version_separators = re.compile(r"[\._-]")
 
 
-def _parse_local_version(local: str) -> Optional[LocalType]:
+def _parse_local_version(local: Optional[str]) -> Optional[LocalType]:
     """
     Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
     """
@@ -447,7 +506,7 @@ def _cmpkey(
     pre: Optional[Tuple[str, int]],
     post: Optional[Tuple[str, int]],
     dev: Optional[Tuple[str, int]],
-    local: Optional[Tuple[SubLocalType]],
+    local: Optional[LocalType],
 ) -> CmpKey:
 
     # When we compare a release version, we want to compare it with all of the
@@ -464,7 +523,7 @@ def _cmpkey(
     # if there is not a pre or a post segment. If we have one of those then
     # the normal sorting rules will handle this case correctly.
     if pre is None and post is None and dev is not None:
-        _pre: PrePostDevType = NegativeInfinity
+        _pre: CmpPrePostDevType = NegativeInfinity
     # Versions without a pre-release (except as noted above) should sort after
     # those with one.
     elif pre is None:
@@ -474,21 +533,21 @@ def _cmpkey(
 
     # Versions without a post segment should sort before those with one.
     if post is None:
-        _post: PrePostDevType = NegativeInfinity
+        _post: CmpPrePostDevType = NegativeInfinity
 
     else:
         _post = post
 
     # Versions without a development segment should sort after those with one.
     if dev is None:
-        _dev: PrePostDevType = Infinity
+        _dev: CmpPrePostDevType = Infinity
 
     else:
         _dev = dev
 
     if local is None:
         # Versions without a local segment should sort before those with one.
-        _local: LocalType = NegativeInfinity
+        _local: CmpLocalType = NegativeInfinity
     else:
         # Versions with a local segment need that segment parsed to implement
         # the sorting rules in PEP440.
diff --git a/src/pip/_vendor/pyparsing/LICENSE b/src/pip/_vendor/pyparsing/LICENSE
deleted file mode 100644
index 1bf98523e33..00000000000
--- a/src/pip/_vendor/pyparsing/LICENSE
+++ /dev/null
@@ -1,18 +0,0 @@
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/src/pip/_vendor/pyparsing/__init__.py b/src/pip/_vendor/pyparsing/__init__.py
deleted file mode 100644
index 83937ebf1f1..00000000000
--- a/src/pip/_vendor/pyparsing/__init__.py
+++ /dev/null
@@ -1,325 +0,0 @@
-# module pyparsing.py
-#
-# Copyright (c) 2003-2022  Paul T. McGuire
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-
-__doc__ = """
-pyparsing module - Classes and methods to define and execute parsing grammars
-=============================================================================
-
-The pyparsing module is an alternative approach to creating and
-executing simple grammars, vs. the traditional lex/yacc approach, or the
-use of regular expressions.  With pyparsing, you don't need to learn
-a new syntax for defining grammars or matching expressions - the parsing
-module provides a library of classes that you use to construct the
-grammar directly in Python.
-
-Here is a program to parse "Hello, World!" (or any greeting of the form
-``", !"``), built up using :class:`Word`,
-:class:`Literal`, and :class:`And` elements
-(the :meth:`'+'` operators create :class:`And` expressions,
-and the strings are auto-converted to :class:`Literal` expressions)::
-
-    from pip._vendor.pyparsing import Word, alphas
-
-    # define grammar of a greeting
-    greet = Word(alphas) + "," + Word(alphas) + "!"
-
-    hello = "Hello, World!"
-    print(hello, "->", greet.parse_string(hello))
-
-The program outputs the following::
-
-    Hello, World! -> ['Hello', ',', 'World', '!']
-
-The Python representation of the grammar is quite readable, owing to the
-self-explanatory class names, and the use of :class:`'+'`,
-:class:`'|'`, :class:`'^'` and :class:`'&'` operators.
-
-The :class:`ParseResults` object returned from
-:class:`ParserElement.parse_string` can be
-accessed as a nested list, a dictionary, or an object with named
-attributes.
-
-The pyparsing module handles some of the problems that are typically
-vexing when writing text parsers:
-
-  - extra or missing whitespace (the above program will also handle
-    "Hello,World!", "Hello  ,  World  !", etc.)
-  - quoted strings
-  - embedded comments
-
-
-Getting Started -
------------------
-Visit the classes :class:`ParserElement` and :class:`ParseResults` to
-see the base classes that most other pyparsing
-classes inherit from. Use the docstrings for examples of how to:
-
- - construct literal match expressions from :class:`Literal` and
-   :class:`CaselessLiteral` classes
- - construct character word-group expressions using the :class:`Word`
-   class
- - see how to create repetitive expressions using :class:`ZeroOrMore`
-   and :class:`OneOrMore` classes
- - use :class:`'+'`, :class:`'|'`, :class:`'^'`,
-   and :class:`'&'` operators to combine simple expressions into
-   more complex ones
- - associate names with your parsed results using
-   :class:`ParserElement.set_results_name`
- - access the parsed data, which is returned as a :class:`ParseResults`
-   object
- - find some helpful expression short-cuts like :class:`DelimitedList`
-   and :class:`one_of`
- - find more useful common expressions in the :class:`pyparsing_common`
-   namespace class
-"""
-from typing import NamedTuple
-
-
-class version_info(NamedTuple):
-    major: int
-    minor: int
-    micro: int
-    releaselevel: str
-    serial: int
-
-    @property
-    def __version__(self):
-        return (
-            f"{self.major}.{self.minor}.{self.micro}"
-            + (
-                f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}",
-                "",
-            )[self.releaselevel == "final"]
-        )
-
-    def __str__(self):
-        return f"{__name__} {self.__version__} / {__version_time__}"
-
-    def __repr__(self):
-        return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
-
-
-__version_info__ = version_info(3, 1, 2, "final", 1)
-__version_time__ = "06 Mar 2024 07:08 UTC"
-__version__ = __version_info__.__version__
-__versionTime__ = __version_time__
-__author__ = "Paul McGuire "
-
-from .util import *
-from .exceptions import *
-from .actions import *
-from .core import __diag__, __compat__
-from .results import *
-from .core import *  # type: ignore[misc, assignment]
-from .core import _builtin_exprs as core_builtin_exprs
-from .helpers import *  # type: ignore[misc, assignment]
-from .helpers import _builtin_exprs as helper_builtin_exprs
-
-from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
-from .testing import pyparsing_test as testing
-from .common import (
-    pyparsing_common as common,
-    _builtin_exprs as common_builtin_exprs,
-)
-
-# define backward compat synonyms
-if "pyparsing_unicode" not in globals():
-    pyparsing_unicode = unicode  # type: ignore[misc]
-if "pyparsing_common" not in globals():
-    pyparsing_common = common  # type: ignore[misc]
-if "pyparsing_test" not in globals():
-    pyparsing_test = testing  # type: ignore[misc]
-
-core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
-
-
-__all__ = [
-    "__version__",
-    "__version_time__",
-    "__author__",
-    "__compat__",
-    "__diag__",
-    "And",
-    "AtLineStart",
-    "AtStringStart",
-    "CaselessKeyword",
-    "CaselessLiteral",
-    "CharsNotIn",
-    "CloseMatch",
-    "Combine",
-    "DelimitedList",
-    "Dict",
-    "Each",
-    "Empty",
-    "FollowedBy",
-    "Forward",
-    "GoToColumn",
-    "Group",
-    "IndentedBlock",
-    "Keyword",
-    "LineEnd",
-    "LineStart",
-    "Literal",
-    "Located",
-    "PrecededBy",
-    "MatchFirst",
-    "NoMatch",
-    "NotAny",
-    "OneOrMore",
-    "OnlyOnce",
-    "OpAssoc",
-    "Opt",
-    "Optional",
-    "Or",
-    "ParseBaseException",
-    "ParseElementEnhance",
-    "ParseException",
-    "ParseExpression",
-    "ParseFatalException",
-    "ParseResults",
-    "ParseSyntaxException",
-    "ParserElement",
-    "PositionToken",
-    "QuotedString",
-    "RecursiveGrammarException",
-    "Regex",
-    "SkipTo",
-    "StringEnd",
-    "StringStart",
-    "Suppress",
-    "Token",
-    "TokenConverter",
-    "White",
-    "Word",
-    "WordEnd",
-    "WordStart",
-    "ZeroOrMore",
-    "Char",
-    "alphanums",
-    "alphas",
-    "alphas8bit",
-    "any_close_tag",
-    "any_open_tag",
-    "autoname_elements",
-    "c_style_comment",
-    "col",
-    "common_html_entity",
-    "condition_as_parse_action",
-    "counted_array",
-    "cpp_style_comment",
-    "dbl_quoted_string",
-    "dbl_slash_comment",
-    "delimited_list",
-    "dict_of",
-    "empty",
-    "hexnums",
-    "html_comment",
-    "identchars",
-    "identbodychars",
-    "infix_notation",
-    "java_style_comment",
-    "line",
-    "line_end",
-    "line_start",
-    "lineno",
-    "make_html_tags",
-    "make_xml_tags",
-    "match_only_at_col",
-    "match_previous_expr",
-    "match_previous_literal",
-    "nested_expr",
-    "null_debug_action",
-    "nums",
-    "one_of",
-    "original_text_for",
-    "printables",
-    "punc8bit",
-    "pyparsing_common",
-    "pyparsing_test",
-    "pyparsing_unicode",
-    "python_style_comment",
-    "quoted_string",
-    "remove_quotes",
-    "replace_with",
-    "replace_html_entity",
-    "rest_of_line",
-    "sgl_quoted_string",
-    "srange",
-    "string_end",
-    "string_start",
-    "token_map",
-    "trace_parse_action",
-    "ungroup",
-    "unicode_set",
-    "unicode_string",
-    "with_attribute",
-    "with_class",
-    # pre-PEP8 compatibility names
-    "__versionTime__",
-    "anyCloseTag",
-    "anyOpenTag",
-    "cStyleComment",
-    "commonHTMLEntity",
-    "conditionAsParseAction",
-    "countedArray",
-    "cppStyleComment",
-    "dblQuotedString",
-    "dblSlashComment",
-    "delimitedList",
-    "dictOf",
-    "htmlComment",
-    "indentedBlock",
-    "infixNotation",
-    "javaStyleComment",
-    "lineEnd",
-    "lineStart",
-    "locatedExpr",
-    "makeHTMLTags",
-    "makeXMLTags",
-    "matchOnlyAtCol",
-    "matchPreviousExpr",
-    "matchPreviousLiteral",
-    "nestedExpr",
-    "nullDebugAction",
-    "oneOf",
-    "opAssoc",
-    "originalTextFor",
-    "pythonStyleComment",
-    "quotedString",
-    "removeQuotes",
-    "replaceHTMLEntity",
-    "replaceWith",
-    "restOfLine",
-    "sglQuotedString",
-    "stringEnd",
-    "stringStart",
-    "tokenMap",
-    "traceParseAction",
-    "unicodeString",
-    "withAttribute",
-    "withClass",
-    "common",
-    "unicode",
-    "testing",
-]
diff --git a/src/pip/_vendor/pyparsing/actions.py b/src/pip/_vendor/pyparsing/actions.py
deleted file mode 100644
index ce51b3957ca..00000000000
--- a/src/pip/_vendor/pyparsing/actions.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# actions.py
-
-from .exceptions import ParseException
-from .util import col, replaced_by_pep8
-
-
-class OnlyOnce:
-    """
-    Wrapper for parse actions, to ensure they are only called once.
-    """
-
-    def __init__(self, method_call):
-        from .core import _trim_arity
-
-        self.callable = _trim_arity(method_call)
-        self.called = False
-
-    def __call__(self, s, l, t):
-        if not self.called:
-            results = self.callable(s, l, t)
-            self.called = True
-            return results
-        raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
-
-    def reset(self):
-        """
-        Allow the associated parse action to be called once more.
-        """
-
-        self.called = False
-
-
-def match_only_at_col(n):
-    """
-    Helper method for defining parse actions that require matching at
-    a specific column in the input text.
-    """
-
-    def verify_col(strg, locn, toks):
-        if col(locn, strg) != n:
-            raise ParseException(strg, locn, f"matched token not at column {n}")
-
-    return verify_col
-
-
-def replace_with(repl_str):
-    """
-    Helper method for common parse actions that simply return
-    a literal value.  Especially useful when used with
-    :class:`transform_string` ().
-
-    Example::
-
-        num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
-        na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
-        term = na | num
-
-        term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234]
-    """
-    return lambda s, l, t: [repl_str]
-
-
-def remove_quotes(s, l, t):
-    """
-    Helper parse action for removing quotation marks from parsed
-    quoted strings.
-
-    Example::
-
-        # by default, quotation marks are included in parsed results
-        quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
-
-        # use remove_quotes to strip quotation marks from parsed results
-        quoted_string.set_parse_action(remove_quotes)
-        quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
-    """
-    return t[0][1:-1]
-
-
-def with_attribute(*args, **attr_dict):
-    """
-    Helper to create a validating parse action to be used with start
-    tags created with :class:`make_xml_tags` or
-    :class:`make_html_tags`. Use ``with_attribute`` to qualify
-    a starting tag with a required attribute value, to avoid false
-    matches on common tags such as ```` or ``
``. - - Call ``with_attribute`` with a series of attribute names and - values. Specify the list of filter attributes names and values as: - - - keyword arguments, as in ``(align="right")``, or - - as an explicit dict with ``**`` operator, when an attribute - name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` - - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` - - For attribute names with a namespace prefix, you must use the second - form. Attribute names are matched insensitive to upper/lower case. - - If just testing for ``class`` (with or without a namespace), use - :class:`with_class`. - - To verify that the attribute exists, but without specifying a value, - pass ``with_attribute.ANY_VALUE`` as the value. - - Example:: - - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this has no type
-
- ''' - div,div_end = make_html_tags("div") - - # only match div tag having a type attribute with value "grid" - div_grid = div().set_parse_action(with_attribute(type="grid")) - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.search_string(html): - print(grid_header.body) - - # construct a match with any div tag having a type attribute, regardless of the value - div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.search_string(html): - print(div_header.body) - - prints:: - - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - if args: - attrs = args[:] - else: - attrs = attr_dict.items() - attrs = [(k, v) for k, v in attrs] - - def pa(s, l, tokens): - for attrName, attrValue in attrs: - if attrName not in tokens: - raise ParseException(s, l, "no matching attribute " + attrName) - if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException( - s, - l, - f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}", - ) - - return pa - - -with_attribute.ANY_VALUE = object() # type: ignore [attr-defined] - - -def with_class(classname, namespace=""): - """ - Simplified version of :class:`with_attribute` when - matching on a div class - made difficult because ``class`` is - a reserved word in Python. - - Example:: - - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this <div> has no class
-
- - ''' - div,div_end = make_html_tags("div") - div_grid = div().set_parse_action(with_class("grid")) - - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.search_string(html): - print(grid_header.body) - - div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.search_string(html): - print(div_header.body) - - prints:: - - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - classattr = f"{namespace}:class" if namespace else "class" - return with_attribute(**{classattr: classname}) - - -# pre-PEP8 compatibility symbols -# fmt: off -replaceWith = replaced_by_pep8("replaceWith", replace_with) -removeQuotes = replaced_by_pep8("removeQuotes", remove_quotes) -withAttribute = replaced_by_pep8("withAttribute", with_attribute) -withClass = replaced_by_pep8("withClass", with_class) -matchOnlyAtCol = replaced_by_pep8("matchOnlyAtCol", match_only_at_col) -# fmt: on diff --git a/src/pip/_vendor/pyparsing/common.py b/src/pip/_vendor/pyparsing/common.py deleted file mode 100644 index 74faa46085a..00000000000 --- a/src/pip/_vendor/pyparsing/common.py +++ /dev/null @@ -1,439 +0,0 @@ -# common.py -from .core import * -from .helpers import DelimitedList, any_open_tag, any_close_tag -from datetime import datetime - - -# some other useful expressions - using lower-case class name since we are really using this as a namespace -class pyparsing_common: - """Here are some common low-level expressions that may be useful in - jump-starting parser development: - - - numeric forms (:class:`integers`, :class:`reals`, - :class:`scientific notation`) - - common :class:`programming identifiers` - - network addresses (:class:`MAC`, - :class:`IPv4`, :class:`IPv6`) - - ISO8601 :class:`dates` and - :class:`datetime` - - :class:`UUID` - - :class:`comma-separated list` - - :class:`url` - - Parse actions: - - - :class:`convert_to_integer` - - :class:`convert_to_float` - - :class:`convert_to_date` - - :class:`convert_to_datetime` - - :class:`strip_html_tags` - - :class:`upcase_tokens` - - :class:`downcase_tokens` - - Example:: - - pyparsing_common.number.run_tests(''' - # any int or real number, returned as the appropriate type - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.fnumber.run_tests(''' - # any int or real number, returned as float - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.hex_integer.run_tests(''' - # hex numbers - 100 - FF - ''') - - pyparsing_common.fraction.run_tests(''' - # fractions - 1/2 - -3/4 - ''') - - pyparsing_common.mixed_integer.run_tests(''' - # mixed fractions - 1 - 1/2 - -3/4 - 1-3/4 - ''') - - import uuid - pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID)) - pyparsing_common.uuid.run_tests(''' - # uuid - 12345678-1234-5678-1234-567812345678 - ''') - - prints:: - - # any int or real number, returned as the appropriate type - 100 - [100] - - -100 - [-100] - - +100 - [100] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # any int or real number, returned as float - 100 - [100.0] - - -100 - [-100.0] - - +100 - [100.0] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # hex numbers - 100 - [256] - - FF - [255] - - # fractions - 1/2 - [0.5] - - -3/4 - [-0.75] - - # mixed fractions - 1 - [1] - - 1/2 - [0.5] - - -3/4 - [-0.75] - - 1-3/4 - [1.75] - - # uuid - 12345678-1234-5678-1234-567812345678 - [UUID('12345678-1234-5678-1234-567812345678')] - """ - - convert_to_integer = token_map(int) - """ - Parse action for converting parsed integers to Python int - """ - - convert_to_float = token_map(float) - """ - Parse action for converting parsed numbers to Python float - """ - - integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer) - """expression that parses an unsigned integer, returns an int""" - - hex_integer = ( - Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16)) - ) - """expression that parses a hexadecimal integer, returns an int""" - - signed_integer = ( - Regex(r"[+-]?\d+") - .set_name("signed integer") - .set_parse_action(convert_to_integer) - ) - """expression that parses an integer with optional leading sign, returns an int""" - - fraction = ( - signed_integer().set_parse_action(convert_to_float) - + "/" - + signed_integer().set_parse_action(convert_to_float) - ).set_name("fraction") - """fractional expression of an integer divided by an integer, returns a float""" - fraction.add_parse_action(lambda tt: tt[0] / tt[-1]) - - mixed_integer = ( - fraction | signed_integer + Opt(Opt("-").suppress() + fraction) - ).set_name("fraction or mixed integer-fraction") - """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" - mixed_integer.add_parse_action(sum) - - real = ( - Regex(r"[+-]?(?:\d+\.\d*|\.\d+)") - .set_name("real number") - .set_parse_action(convert_to_float) - ) - """expression that parses a floating point number and returns a float""" - - sci_real = ( - Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)") - .set_name("real number with scientific notation") - .set_parse_action(convert_to_float) - ) - """expression that parses a floating point number with optional - scientific notation and returns a float""" - - # streamlining this expression makes the docs nicer-looking - number = (sci_real | real | signed_integer).set_name("number").streamline() - """any numeric expression, returns the corresponding Python type""" - - fnumber = ( - Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?") - .set_name("fnumber") - .set_parse_action(convert_to_float) - ) - """any int or real number, returned as float""" - - ieee_float = ( - Regex(r"(?i)[+-]?((\d+\.?\d*(e[+-]?\d+)?)|nan|inf(inity)?)") - .set_name("ieee_float") - .set_parse_action(convert_to_float) - ) - """any floating-point literal (int, real number, infinity, or NaN), returned as float""" - - identifier = Word(identchars, identbodychars).set_name("identifier") - """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" - - ipv4_address = Regex( - r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" - ).set_name("IPv4 address") - "IPv4 address (``0.0.0.0 - 255.255.255.255``)" - - _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer") - _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name( - "full IPv6 address" - ) - _short_ipv6_address = ( - Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) - + "::" - + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6)) - ).set_name("short IPv6 address") - _short_ipv6_address.add_condition( - lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 - ) - _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address") - ipv6_address = Combine( - (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name( - "IPv6 address" - ) - ).set_name("IPv6 address") - "IPv6 address (long, short, or mixed form)" - - mac_address = Regex( - r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" - ).set_name("MAC address") - "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" - - @staticmethod - def convert_to_date(fmt: str = "%Y-%m-%d"): - """ - Helper to create a parse action for converting parsed date string to Python datetime.date - - Params - - - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) - - Example:: - - date_expr = pyparsing_common.iso8601_date.copy() - date_expr.set_parse_action(pyparsing_common.convert_to_date()) - print(date_expr.parse_string("1999-12-31")) - - prints:: - - [datetime.date(1999, 12, 31)] - """ - - def cvt_fn(ss, ll, tt): - try: - return datetime.strptime(tt[0], fmt).date() - except ValueError as ve: - raise ParseException(ss, ll, str(ve)) - - return cvt_fn - - @staticmethod - def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"): - """Helper to create a parse action for converting parsed - datetime string to Python datetime.datetime - - Params - - - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) - - Example:: - - dt_expr = pyparsing_common.iso8601_datetime.copy() - dt_expr.set_parse_action(pyparsing_common.convert_to_datetime()) - print(dt_expr.parse_string("1999-12-31T23:59:59.999")) - - prints:: - - [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] - """ - - def cvt_fn(s, l, t): - try: - return datetime.strptime(t[0], fmt) - except ValueError as ve: - raise ParseException(s, l, str(ve)) - - return cvt_fn - - iso8601_date = Regex( - r"(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?" - ).set_name("ISO8601 date") - "ISO8601 date (``yyyy-mm-dd``)" - - iso8601_datetime = Regex( - r"(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?" - ).set_name("ISO8601 datetime") - "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" - - uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID") - "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" - - _html_stripper = any_open_tag.suppress() | any_close_tag.suppress() - - @staticmethod - def strip_html_tags(s: str, l: int, tokens: ParseResults): - """Parse action to remove HTML tags from web page HTML source - - Example:: - - # strip HTML links from normal text - text = 'More info at the pyparsing wiki page' - td, td_end = make_html_tags("TD") - table_text = td + SkipTo(td_end).set_parse_action(pyparsing_common.strip_html_tags)("body") + td_end - print(table_text.parse_string(text).body) - - Prints:: - - More info at the pyparsing wiki page - """ - return pyparsing_common._html_stripper.transform_string(tokens[0]) - - _commasepitem = ( - Combine( - OneOrMore( - ~Literal(",") - + ~LineEnd() - + Word(printables, exclude_chars=",") - + Opt(White(" \t") + ~FollowedBy(LineEnd() | ",")) - ) - ) - .streamline() - .set_name("commaItem") - ) - comma_separated_list = DelimitedList( - Opt(quoted_string.copy() | _commasepitem, default="") - ).set_name("comma separated list") - """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" - - upcase_tokens = staticmethod(token_map(lambda t: t.upper())) - """Parse action to convert tokens to upper case.""" - - downcase_tokens = staticmethod(token_map(lambda t: t.lower())) - """Parse action to convert tokens to lower case.""" - - # fmt: off - url = Regex( - # https://mathiasbynens.be/demo/url-regex - # https://gist.github.com/dperini/729294 - r"(?P" + - # protocol identifier (optional) - # short syntax // still required - r"(?:(?:(?Phttps?|ftp):)?\/\/)" + - # user:pass BasicAuth (optional) - r"(?:(?P\S+(?::\S*)?)@)?" + - r"(?P" + - # IP address exclusion - # private & local networks - r"(?!(?:10|127)(?:\.\d{1,3}){3})" + - r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" + - r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" + - # IP address dotted notation octets - # excludes loopback network 0.0.0.0 - # excludes reserved space >= 224.0.0.0 - # excludes network & broadcast addresses - # (first & last IP address of each class) - r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + - r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" + - r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" + - r"|" + - # host & domain names, may end with dot - # can be replaced by a shortest alternative - # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+ - r"(?:" + - r"(?:" + - r"[a-z0-9\u00a1-\uffff]" + - r"[a-z0-9\u00a1-\uffff_-]{0,62}" + - r")?" + - r"[a-z0-9\u00a1-\uffff]\." + - r")+" + - # TLD identifier name, may end with dot - r"(?:[a-z\u00a1-\uffff]{2,}\.?)" + - r")" + - # port number (optional) - r"(:(?P\d{2,5}))?" + - # resource path (optional) - r"(?P\/[^?# ]*)?" + - # query string (optional) - r"(\?(?P[^#]*))?" + - # fragment (optional) - r"(#(?P\S*))?" + - r")" - ).set_name("url") - """URL (http/https/ftp scheme)""" - # fmt: on - - # pre-PEP8 compatibility names - convertToInteger = convert_to_integer - """Deprecated - use :class:`convert_to_integer`""" - convertToFloat = convert_to_float - """Deprecated - use :class:`convert_to_float`""" - convertToDate = convert_to_date - """Deprecated - use :class:`convert_to_date`""" - convertToDatetime = convert_to_datetime - """Deprecated - use :class:`convert_to_datetime`""" - stripHTMLTags = strip_html_tags - """Deprecated - use :class:`strip_html_tags`""" - upcaseTokens = upcase_tokens - """Deprecated - use :class:`upcase_tokens`""" - downcaseTokens = downcase_tokens - """Deprecated - use :class:`downcase_tokens`""" - - -_builtin_exprs = [ - v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement) -] diff --git a/src/pip/_vendor/pyparsing/core.py b/src/pip/_vendor/pyparsing/core.py deleted file mode 100644 index fc403862354..00000000000 --- a/src/pip/_vendor/pyparsing/core.py +++ /dev/null @@ -1,6087 +0,0 @@ -# -# core.py -# - -from collections import deque -import os -import typing -from typing import ( - Any, - Callable, - Generator, - List, - NamedTuple, - Sequence, - Set, - TextIO, - Tuple, - Union, - cast, -) -from abc import ABC, abstractmethod -from enum import Enum -import string -import copy -import warnings -import re -import sys -from collections.abc import Iterable -import traceback -import types -from operator import itemgetter -from functools import wraps -from threading import RLock -from pathlib import Path - -from .util import ( - _FifoCache, - _UnboundedCache, - __config_flags, - _collapse_string_to_ranges, - _escape_regex_range_chars, - _bslash, - _flatten, - LRUMemo as _LRUMemo, - UnboundedMemo as _UnboundedMemo, - replaced_by_pep8, -) -from .exceptions import * -from .actions import * -from .results import ParseResults, _ParseResultsWithOffset -from .unicode import pyparsing_unicode - -_MAX_INT = sys.maxsize -str_type: Tuple[type, ...] = (str, bytes) - -# -# Copyright (c) 2003-2022 Paul T. McGuire -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - - -if sys.version_info >= (3, 8): - from functools import cached_property -else: - - class cached_property: - def __init__(self, func): - self._func = func - - def __get__(self, instance, owner=None): - ret = instance.__dict__[self._func.__name__] = self._func(instance) - return ret - - -class __compat__(__config_flags): - """ - A cross-version compatibility configuration for pyparsing features that will be - released in a future version. By setting values in this configuration to True, - those features can be enabled in prior versions for compatibility development - and testing. - - - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping - of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; - maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 - behavior - """ - - _type_desc = "compatibility" - - collect_all_And_tokens = True - - _all_names = [__ for __ in locals() if not __.startswith("_")] - _fixed_names = """ - collect_all_And_tokens - """.split() - - -class __diag__(__config_flags): - _type_desc = "diagnostic" - - warn_multiple_tokens_in_named_alternation = False - warn_ungrouped_named_tokens_in_collection = False - warn_name_set_on_empty_Forward = False - warn_on_parse_using_empty_Forward = False - warn_on_assignment_to_Forward = False - warn_on_multiple_string_args_to_oneof = False - warn_on_match_first_with_lshift_operator = False - enable_debug_on_named_expressions = False - - _all_names = [__ for __ in locals() if not __.startswith("_")] - _warning_names = [name for name in _all_names if name.startswith("warn")] - _debug_names = [name for name in _all_names if name.startswith("enable_debug")] - - @classmethod - def enable_all_warnings(cls) -> None: - for name in cls._warning_names: - cls.enable(name) - - -class Diagnostics(Enum): - """ - Diagnostic configuration (all default to disabled) - - - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results - name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions - - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results - name is defined on a containing expression with ungrouped subexpressions that also - have results names - - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined - with a results name, but has no contents defined - - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is - defined in a grammar but has never had an expression attached to it - - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined - but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` - - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is - incorrectly called with multiple str arguments - - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent - calls to :class:`ParserElement.set_name` - - Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. - All warnings can be enabled by calling :class:`enable_all_warnings`. - """ - - warn_multiple_tokens_in_named_alternation = 0 - warn_ungrouped_named_tokens_in_collection = 1 - warn_name_set_on_empty_Forward = 2 - warn_on_parse_using_empty_Forward = 3 - warn_on_assignment_to_Forward = 4 - warn_on_multiple_string_args_to_oneof = 5 - warn_on_match_first_with_lshift_operator = 6 - enable_debug_on_named_expressions = 7 - - -def enable_diag(diag_enum: Diagnostics) -> None: - """ - Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). - """ - __diag__.enable(diag_enum.name) - - -def disable_diag(diag_enum: Diagnostics) -> None: - """ - Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). - """ - __diag__.disable(diag_enum.name) - - -def enable_all_warnings() -> None: - """ - Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). - """ - __diag__.enable_all_warnings() - - -# hide abstract class -del __config_flags - - -def _should_enable_warnings( - cmd_line_warn_options: typing.Iterable[str], warn_env_var: typing.Optional[str] -) -> bool: - enable = bool(warn_env_var) - for warn_opt in cmd_line_warn_options: - w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( - ":" - )[:5] - if not w_action.lower().startswith("i") and ( - not (w_message or w_category or w_module) or w_module == "pyparsing" - ): - enable = True - elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): - enable = False - return enable - - -if _should_enable_warnings( - sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") -): - enable_all_warnings() - - -# build list of single arg builtins, that can be used as parse actions -_single_arg_builtins = { - sum, - len, - sorted, - reversed, - list, - tuple, - set, - any, - all, - min, - max, -} - -_generatorType = types.GeneratorType -ParseImplReturnType = Tuple[int, Any] -PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] -ParseAction = Union[ - Callable[[], Any], - Callable[[ParseResults], Any], - Callable[[int, ParseResults], Any], - Callable[[str, int, ParseResults], Any], -] -ParseCondition = Union[ - Callable[[], bool], - Callable[[ParseResults], bool], - Callable[[int, ParseResults], bool], - Callable[[str, int, ParseResults], bool], -] -ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] -DebugStartAction = Callable[[str, int, "ParserElement", bool], None] -DebugSuccessAction = Callable[ - [str, int, int, "ParserElement", ParseResults, bool], None -] -DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] - - -alphas = string.ascii_uppercase + string.ascii_lowercase -identchars = pyparsing_unicode.Latin1.identchars -identbodychars = pyparsing_unicode.Latin1.identbodychars -nums = "0123456789" -hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums -printables = "".join([c for c in string.printable if c not in string.whitespace]) - -_trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] - - -def _trim_arity(func, max_limit=3): - """decorator to trim function calls to match the arity of the target""" - global _trim_arity_call_line - - if func in _single_arg_builtins: - return lambda s, l, t: func(t) - - limit = 0 - found_arity = False - - # synthesize what would be returned by traceback.extract_stack at the call to - # user's parse action 'func', so that we don't incur call penalty at parse time - - # fmt: off - LINE_DIFF = 7 - # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND - # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! - _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) - pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) - - def wrapper(*args): - nonlocal found_arity, limit - while 1: - try: - ret = func(*args[limit:]) - found_arity = True - return ret - except TypeError as te: - # re-raise TypeErrors if they did not come from our arity testing - if found_arity: - raise - else: - tb = te.__traceback__ - frames = traceback.extract_tb(tb, limit=2) - frame_summary = frames[-1] - trim_arity_type_error = ( - [frame_summary[:2]][-1][:2] == pa_call_line_synth - ) - del tb - - if trim_arity_type_error: - if limit < max_limit: - limit += 1 - continue - - raise - # fmt: on - - # copy func name to wrapper for sensible debug output - # (can't use functools.wraps, since that messes with function signature) - func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) - wrapper.__name__ = func_name - wrapper.__doc__ = func.__doc__ - - return wrapper - - -def condition_as_parse_action( - fn: ParseCondition, message: typing.Optional[str] = None, fatal: bool = False -) -> ParseAction: - """ - Function to convert a simple predicate function that returns ``True`` or ``False`` - into a parse action. Can be used in places when a parse action is required - and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition - to an operator level in :class:`infix_notation`). - - Optional keyword arguments: - - - ``message`` - define a custom message to be used in the raised exception - - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; - otherwise will raise :class:`ParseException` - - """ - msg = message if message is not None else "failed user-defined condition" - exc_type = ParseFatalException if fatal else ParseException - fn = _trim_arity(fn) - - @wraps(fn) - def pa(s, l, t): - if not bool(fn(s, l, t)): - raise exc_type(s, l, msg) - - return pa - - -def _default_start_debug_action( - instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False -): - cache_hit_str = "*" if cache_hit else "" - print( - ( - f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" - f" {line(loc, instring)}\n" - f" {' ' * (col(loc, instring) - 1)}^" - ) - ) - - -def _default_success_debug_action( - instring: str, - startloc: int, - endloc: int, - expr: "ParserElement", - toks: ParseResults, - cache_hit: bool = False, -): - cache_hit_str = "*" if cache_hit else "" - print(f"{cache_hit_str}Matched {expr} -> {toks.as_list()}") - - -def _default_exception_debug_action( - instring: str, - loc: int, - expr: "ParserElement", - exc: Exception, - cache_hit: bool = False, -): - cache_hit_str = "*" if cache_hit else "" - print(f"{cache_hit_str}Match {expr} failed, {type(exc).__name__} raised: {exc}") - - -def null_debug_action(*args): - """'Do-nothing' debug action, to suppress debugging output during parsing.""" - - -class ParserElement(ABC): - """Abstract base level parser element class.""" - - DEFAULT_WHITE_CHARS: str = " \n\t\r" - verbose_stacktrace: bool = False - _literalStringClass: type = None # type: ignore[assignment] - - @staticmethod - def set_default_whitespace_chars(chars: str) -> None: - r""" - Overrides the default whitespace chars - - Example:: - - # default whitespace chars are space, and newline - Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] - - # change to just treat newline as significant - ParserElement.set_default_whitespace_chars(" \t") - Word(alphas)[1, ...].parse_string("abc def\nghi jkl") # -> ['abc', 'def'] - """ - ParserElement.DEFAULT_WHITE_CHARS = chars - - # update whitespace all parse expressions defined in this module - for expr in _builtin_exprs: - if expr.copyDefaultWhiteChars: - expr.whiteChars = set(chars) - - @staticmethod - def inline_literals_using(cls: type) -> None: - """ - Set class to be used for inclusion of string literals into a parser. - - Example:: - - # default literal class used is Literal - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] - - - # change to Suppress - ParserElement.inline_literals_using(Suppress) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] - """ - ParserElement._literalStringClass = cls - - @classmethod - def using_each(cls, seq, **class_kwargs): - """ - Yields a sequence of class(obj, **class_kwargs) for obj in seq. - - Example:: - - LPAR, RPAR, LBRACE, RBRACE, SEMI = Suppress.using_each("(){};") - - """ - yield from (cls(obj, **class_kwargs) for obj in seq) - - class DebugActions(NamedTuple): - debug_try: typing.Optional[DebugStartAction] - debug_match: typing.Optional[DebugSuccessAction] - debug_fail: typing.Optional[DebugExceptionAction] - - def __init__(self, savelist: bool = False): - self.parseAction: List[ParseAction] = list() - self.failAction: typing.Optional[ParseFailAction] = None - self.customName: str = None # type: ignore[assignment] - self._defaultName: typing.Optional[str] = None - self.resultsName: str = None # type: ignore[assignment] - self.saveAsList = savelist - self.skipWhitespace = True - self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) - self.copyDefaultWhiteChars = True - # used when checking for left-recursion - self.mayReturnEmpty = False - self.keepTabs = False - self.ignoreExprs: List["ParserElement"] = list() - self.debug = False - self.streamlined = False - # optimize exception handling for subclasses that don't advance parse index - self.mayIndexError = True - self.errmsg = "" - # mark results names as modal (report only last) or cumulative (list all) - self.modalResults = True - # custom debug actions - self.debugActions = self.DebugActions(None, None, None) - # avoid redundant calls to preParse - self.callPreparse = True - self.callDuringTry = False - self.suppress_warnings_: List[Diagnostics] = [] - - def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": - """ - Suppress warnings emitted for a particular diagnostic on this expression. - - Example:: - - base = pp.Forward() - base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) - - # statement would normally raise a warning, but is now suppressed - print(base.parse_string("x")) - - """ - self.suppress_warnings_.append(warning_type) - return self - - def visit_all(self): - """General-purpose method to yield all expressions and sub-expressions - in a grammar. Typically just for internal use. - """ - to_visit = deque([self]) - seen = set() - while to_visit: - cur = to_visit.popleft() - - # guard against looping forever through recursive grammars - if cur in seen: - continue - seen.add(cur) - - to_visit.extend(cur.recurse()) - yield cur - - def copy(self) -> "ParserElement": - """ - Make a copy of this :class:`ParserElement`. Useful for defining - different parse actions for the same parsing pattern, using copies of - the original parse element. - - Example:: - - integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) - integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") - integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") - - print((integerK | integerM | integer)[1, ...].parse_string("5K 100 640K 256M")) - - prints:: - - [5120, 100, 655360, 268435456] - - Equivalent form of ``expr.copy()`` is just ``expr()``:: - - integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") - """ - cpy = copy.copy(self) - cpy.parseAction = self.parseAction[:] - cpy.ignoreExprs = self.ignoreExprs[:] - if self.copyDefaultWhiteChars: - cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) - return cpy - - def set_results_name( - self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False - ) -> "ParserElement": - """ - Define name for referencing matching tokens as a nested attribute - of the returned parse results. - - Normally, results names are assigned as you would assign keys in a dict: - any existing value is overwritten by later values. If it is necessary to - keep all values captured for a particular results name, call ``set_results_name`` - with ``list_all_matches`` = True. - - NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; - this is so that the client can define a basic element, such as an - integer, and reference it in multiple places with different names. - - You can also set results names using the abbreviated syntax, - ``expr("name")`` in place of ``expr.set_results_name("name")`` - - see :class:`__call__`. If ``list_all_matches`` is required, use - ``expr("name*")``. - - Example:: - - integer = Word(nums) - date_str = (integer.set_results_name("year") + '/' - + integer.set_results_name("month") + '/' - + integer.set_results_name("day")) - - # equivalent form: - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - """ - listAllMatches = listAllMatches or list_all_matches - return self._setResultsName(name, listAllMatches) - - def _setResultsName(self, name, listAllMatches=False): - if name is None: - return self - newself = self.copy() - if name.endswith("*"): - name = name[:-1] - listAllMatches = True - newself.resultsName = name - newself.modalResults = not listAllMatches - return newself - - def set_break(self, break_flag: bool = True) -> "ParserElement": - """ - Method to invoke the Python pdb debugger when this element is - about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to - disable. - """ - if break_flag: - _parseMethod = self._parse - - def breaker(instring, loc, doActions=True, callPreParse=True): - import pdb - - # this call to pdb.set_trace() is intentional, not a checkin error - pdb.set_trace() - return _parseMethod(instring, loc, doActions, callPreParse) - - breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] - self._parse = breaker # type: ignore [assignment] - elif hasattr(self._parse, "_originalParseMethod"): - self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment] - return self - - def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": - """ - Define one or more actions to perform when successfully matching parse element definition. - - Parse actions can be called to perform data conversions, do extra validation, - update external data structures, or enhance or replace the parsed tokens. - Each parse action ``fn`` is a callable method with 0-3 arguments, called as - ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: - - - ``s`` = the original string being parsed (see note below) - - ``loc`` = the location of the matching substring - - ``toks`` = a list of the matched tokens, packaged as a :class:`ParseResults` object - - The parsed tokens are passed to the parse action as ParseResults. They can be - modified in place using list-style append, extend, and pop operations to update - the parsed list elements; and with dictionary-style item set and del operations - to add, update, or remove any named results. If the tokens are modified in place, - it is not necessary to return them with a return statement. - - Parse actions can also completely replace the given tokens, with another ``ParseResults`` - object, or with some entirely different object (common for parse actions that perform data - conversions). A convenient way to build a new parse result is to define the values - using a dict, and then create the return value using :class:`ParseResults.from_dict`. - - If None is passed as the ``fn`` parse action, all previously added parse actions for this - expression are cleared. - - Optional keyword arguments: - - - ``call_during_try`` = (default= ``False``) indicate if parse action should be run during - lookaheads and alternate testing. For parse actions that have side effects, it is - important to only call the parse action once it is determined that it is being - called as part of a successful parse. For parse actions that perform additional - validation, then call_during_try should be passed as True, so that the validation - code is included in the preliminary "try" parses. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See :class:`parse_string` for more - information on parsing strings containing ```` s, and suggested - methods to maintain a consistent view of the parsed string, the parse - location, and line and column positions within the parsed string. - - Example:: - - # parse dates in the form YYYY/MM/DD - - # use parse action to convert toks from str to int at parse time - def convert_to_int(toks): - return int(toks[0]) - - # use a parse action to verify that the date is a valid date - def is_valid_date(instring, loc, toks): - from datetime import date - year, month, day = toks[::2] - try: - date(year, month, day) - except ValueError: - raise ParseException(instring, loc, "invalid date given") - - integer = Word(nums) - date_str = integer + '/' + integer + '/' + integer - - # add parse actions - integer.set_parse_action(convert_to_int) - date_str.set_parse_action(is_valid_date) - - # note that integer fields are now ints, not strings - date_str.run_tests(''' - # successful parse - note that integer fields were converted to ints - 1999/12/31 - - # fail - invalid date - 1999/13/31 - ''') - """ - if list(fns) == [None]: - self.parseAction = [] - return self - - if not all(callable(fn) for fn in fns): - raise TypeError("parse actions must be callable") - self.parseAction = [_trim_arity(fn) for fn in fns] - self.callDuringTry = kwargs.get( - "call_during_try", kwargs.get("callDuringTry", False) - ) - - return self - - def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": - """ - Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. - - See examples in :class:`copy`. - """ - self.parseAction += [_trim_arity(fn) for fn in fns] - self.callDuringTry = self.callDuringTry or kwargs.get( - "call_during_try", kwargs.get("callDuringTry", False) - ) - return self - - def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": - """Add a boolean predicate function to expression's list of parse actions. See - :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, - functions passed to ``add_condition`` need to return boolean success/fail of the condition. - - Optional keyword arguments: - - - ``message`` = define a custom message to be used in the raised exception - - ``fatal`` = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise - ParseException - - ``call_during_try`` = boolean to indicate if this method should be called during internal tryParse calls, - default=False - - Example:: - - integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) - year_int = integer.copy() - year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") - date_str = year_int + '/' + integer + '/' + integer - - result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), - (line:1, col:1) - """ - for fn in fns: - self.parseAction.append( - condition_as_parse_action( - fn, - message=str(kwargs.get("message")), - fatal=bool(kwargs.get("fatal", False)), - ) - ) - - self.callDuringTry = self.callDuringTry or kwargs.get( - "call_during_try", kwargs.get("callDuringTry", False) - ) - return self - - def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": - """ - Define action to perform if parsing fails at this expression. - Fail acton fn is a callable function that takes the arguments - ``fn(s, loc, expr, err)`` where: - - - ``s`` = string being parsed - - ``loc`` = location where expression match was attempted and failed - - ``expr`` = the parse expression that failed - - ``err`` = the exception thrown - - The function returns no value. It may throw :class:`ParseFatalException` - if it is desired to stop parsing immediately.""" - self.failAction = fn - return self - - def _skipIgnorables(self, instring: str, loc: int) -> int: - if not self.ignoreExprs: - return loc - exprsFound = True - ignore_expr_fns = [e._parse for e in self.ignoreExprs] - last_loc = loc - while exprsFound: - exprsFound = False - for ignore_fn in ignore_expr_fns: - try: - while 1: - loc, dummy = ignore_fn(instring, loc) - exprsFound = True - except ParseException: - pass - # check if all ignore exprs matched but didn't actually advance the parse location - if loc == last_loc: - break - last_loc = loc - return loc - - def preParse(self, instring: str, loc: int) -> int: - if self.ignoreExprs: - loc = self._skipIgnorables(instring, loc) - - if self.skipWhitespace: - instrlen = len(instring) - white_chars = self.whiteChars - while loc < instrlen and instring[loc] in white_chars: - loc += 1 - - return loc - - def parseImpl(self, instring, loc, doActions=True): - return loc, [] - - def postParse(self, instring, loc, tokenlist): - return tokenlist - - # @profile - def _parseNoCache( - self, instring, loc, doActions=True, callPreParse=True - ) -> Tuple[int, ParseResults]: - TRY, MATCH, FAIL = 0, 1, 2 - debugging = self.debug # and doActions) - len_instring = len(instring) - - if debugging or self.failAction: - # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) - try: - if callPreParse and self.callPreparse: - pre_loc = self.preParse(instring, loc) - else: - pre_loc = loc - tokens_start = pre_loc - if self.debugActions.debug_try: - self.debugActions.debug_try(instring, tokens_start, self, False) - if self.mayIndexError or pre_loc >= len_instring: - try: - loc, tokens = self.parseImpl(instring, pre_loc, doActions) - except IndexError: - raise ParseException(instring, len_instring, self.errmsg, self) - else: - loc, tokens = self.parseImpl(instring, pre_loc, doActions) - except Exception as err: - # print("Exception raised:", err) - if self.debugActions.debug_fail: - self.debugActions.debug_fail( - instring, tokens_start, self, err, False - ) - if self.failAction: - self.failAction(instring, tokens_start, self, err) - raise - else: - if callPreParse and self.callPreparse: - pre_loc = self.preParse(instring, loc) - else: - pre_loc = loc - tokens_start = pre_loc - if self.mayIndexError or pre_loc >= len_instring: - try: - loc, tokens = self.parseImpl(instring, pre_loc, doActions) - except IndexError: - raise ParseException(instring, len_instring, self.errmsg, self) - else: - loc, tokens = self.parseImpl(instring, pre_loc, doActions) - - tokens = self.postParse(instring, loc, tokens) - - ret_tokens = ParseResults( - tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults - ) - if self.parseAction and (doActions or self.callDuringTry): - if debugging: - try: - for fn in self.parseAction: - try: - tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] - except IndexError as parse_action_exc: - exc = ParseException("exception raised in parse action") - raise exc from parse_action_exc - - if tokens is not None and tokens is not ret_tokens: - ret_tokens = ParseResults( - tokens, - self.resultsName, - asList=self.saveAsList - and isinstance(tokens, (ParseResults, list)), - modal=self.modalResults, - ) - except Exception as err: - # print "Exception raised in user parse action:", err - if self.debugActions.debug_fail: - self.debugActions.debug_fail( - instring, tokens_start, self, err, False - ) - raise - else: - for fn in self.parseAction: - try: - tokens = fn(instring, tokens_start, ret_tokens) # type: ignore [call-arg, arg-type] - except IndexError as parse_action_exc: - exc = ParseException("exception raised in parse action") - raise exc from parse_action_exc - - if tokens is not None and tokens is not ret_tokens: - ret_tokens = ParseResults( - tokens, - self.resultsName, - asList=self.saveAsList - and isinstance(tokens, (ParseResults, list)), - modal=self.modalResults, - ) - if debugging: - # print("Matched", self, "->", ret_tokens.as_list()) - if self.debugActions.debug_match: - self.debugActions.debug_match( - instring, tokens_start, loc, self, ret_tokens, False - ) - - return loc, ret_tokens - - def try_parse( - self, - instring: str, - loc: int, - *, - raise_fatal: bool = False, - do_actions: bool = False, - ) -> int: - try: - return self._parse(instring, loc, doActions=do_actions)[0] - except ParseFatalException: - if raise_fatal: - raise - raise ParseException(instring, loc, self.errmsg, self) - - def can_parse_next(self, instring: str, loc: int, do_actions: bool = False) -> bool: - try: - self.try_parse(instring, loc, do_actions=do_actions) - except (ParseException, IndexError): - return False - else: - return True - - # cache for left-recursion in Forward references - recursion_lock = RLock() - recursion_memos: typing.Dict[ - Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] - ] = {} - - class _CacheType(dict): - """ - class to help type checking - """ - - not_in_cache: bool - - def get(self, *args): ... - - def set(self, *args): ... - - # argument cache for optimizing repeated calls when backtracking through recursive expressions - packrat_cache = ( - _CacheType() - ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail - packrat_cache_lock = RLock() - packrat_cache_stats = [0, 0] - - # this method gets repeatedly called during backtracking with the same arguments - - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression - def _parseCache( - self, instring, loc, doActions=True, callPreParse=True - ) -> Tuple[int, ParseResults]: - HIT, MISS = 0, 1 - TRY, MATCH, FAIL = 0, 1, 2 - lookup = (self, instring, loc, callPreParse, doActions) - with ParserElement.packrat_cache_lock: - cache = ParserElement.packrat_cache - value = cache.get(lookup) - if value is cache.not_in_cache: - ParserElement.packrat_cache_stats[MISS] += 1 - try: - value = self._parseNoCache(instring, loc, doActions, callPreParse) - except ParseBaseException as pe: - # cache a copy of the exception, without the traceback - cache.set(lookup, pe.__class__(*pe.args)) - raise - else: - cache.set(lookup, (value[0], value[1].copy(), loc)) - return value - else: - ParserElement.packrat_cache_stats[HIT] += 1 - if self.debug and self.debugActions.debug_try: - try: - self.debugActions.debug_try(instring, loc, self, cache_hit=True) # type: ignore [call-arg] - except TypeError: - pass - if isinstance(value, Exception): - if self.debug and self.debugActions.debug_fail: - try: - self.debugActions.debug_fail( - instring, loc, self, value, cache_hit=True # type: ignore [call-arg] - ) - except TypeError: - pass - raise value - - value = cast(Tuple[int, ParseResults, int], value) - loc_, result, endloc = value[0], value[1].copy(), value[2] - if self.debug and self.debugActions.debug_match: - try: - self.debugActions.debug_match( - instring, loc_, endloc, self, result, cache_hit=True # type: ignore [call-arg] - ) - except TypeError: - pass - - return loc_, result - - _parse = _parseNoCache - - @staticmethod - def reset_cache() -> None: - ParserElement.packrat_cache.clear() - ParserElement.packrat_cache_stats[:] = [0] * len( - ParserElement.packrat_cache_stats - ) - ParserElement.recursion_memos.clear() - - _packratEnabled = False - _left_recursion_enabled = False - - @staticmethod - def disable_memoization() -> None: - """ - Disables active Packrat or Left Recursion parsing and their memoization - - This method also works if neither Packrat nor Left Recursion are enabled. - This makes it safe to call before activating Packrat nor Left Recursion - to clear any previous settings. - """ - ParserElement.reset_cache() - ParserElement._left_recursion_enabled = False - ParserElement._packratEnabled = False - ParserElement._parse = ParserElement._parseNoCache - - @staticmethod - def enable_left_recursion( - cache_size_limit: typing.Optional[int] = None, *, force=False - ) -> None: - """ - Enables "bounded recursion" parsing, which allows for both direct and indirect - left-recursion. During parsing, left-recursive :class:`Forward` elements are - repeatedly matched with a fixed recursion depth that is gradually increased - until finding the longest match. - - Example:: - - from pip._vendor import pyparsing as pp - pp.ParserElement.enable_left_recursion() - - E = pp.Forward("E") - num = pp.Word(pp.nums) - # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... - E <<= E + '+' - num | num - - print(E.parse_string("1+2+3")) - - Recursion search naturally memoizes matches of ``Forward`` elements and may - thus skip reevaluation of parse actions during backtracking. This may break - programs with parse actions which rely on strict ordering of side-effects. - - Parameters: - - - ``cache_size_limit`` - (default=``None``) - memoize at most this many - ``Forward`` elements during matching; if ``None`` (the default), - memoize all ``Forward`` elements. - - Bounded Recursion parsing works similar but not identical to Packrat parsing, - thus the two cannot be used together. Use ``force=True`` to disable any - previous, conflicting settings. - """ - if force: - ParserElement.disable_memoization() - elif ParserElement._packratEnabled: - raise RuntimeError("Packrat and Bounded Recursion are not compatible") - if cache_size_limit is None: - ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment] - elif cache_size_limit > 0: - ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] - else: - raise NotImplementedError(f"Memo size of {cache_size_limit}") - ParserElement._left_recursion_enabled = True - - @staticmethod - def enable_packrat( - cache_size_limit: Union[int, None] = 128, *, force: bool = False - ) -> None: - """ - Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - Parameters: - - - ``cache_size_limit`` - (default= ``128``) - if an integer value is provided - will limit the size of the packrat cache; if None is passed, then - the cache size will be unbounded; if 0 is passed, the cache will - be effectively disabled. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method :class:`ParserElement.enable_packrat`. - For best results, call ``enable_packrat()`` immediately after - importing pyparsing. - - Example:: - - from pip._vendor import pyparsing - pyparsing.ParserElement.enable_packrat() - - Packrat parsing works similar but not identical to Bounded Recursion parsing, - thus the two cannot be used together. Use ``force=True`` to disable any - previous, conflicting settings. - """ - if force: - ParserElement.disable_memoization() - elif ParserElement._left_recursion_enabled: - raise RuntimeError("Packrat and Bounded Recursion are not compatible") - - if ParserElement._packratEnabled: - return - - ParserElement._packratEnabled = True - if cache_size_limit is None: - ParserElement.packrat_cache = _UnboundedCache() - else: - ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment] - ParserElement._parse = ParserElement._parseCache - - def parse_string( - self, instring: str, parse_all: bool = False, *, parseAll: bool = False - ) -> ParseResults: - """ - Parse a string with respect to the parser definition. This function is intended as the primary interface to the - client code. - - :param instring: The input string to be parsed. - :param parse_all: If set, the entire input string must match the grammar. - :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. - :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. - :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or - an object with attributes if the given parser includes results names. - - If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This - is also equivalent to ending the grammar with :class:`StringEnd`\\ (). - - To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are - converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string - contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string - being parsed, one can ensure a consistent view of the input string by doing one of the following: - - - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), - - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the - parse action's ``s`` argument, or - - explicitly expand the tabs in your input string before calling ``parse_string``. - - Examples: - - By default, partial matches are OK. - - >>> res = Word('a').parse_string('aaaaabaaa') - >>> print(res) - ['aaaaa'] - - The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children - directly to see more examples. - - It raises an exception if parse_all flag is set and instring does not match the whole grammar. - - >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) - Traceback (most recent call last): - ... - pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) - """ - parseAll = parse_all or parseAll - - ParserElement.reset_cache() - if not self.streamlined: - self.streamline() - for e in self.ignoreExprs: - e.streamline() - if not self.keepTabs: - instring = instring.expandtabs() - try: - loc, tokens = self._parse(instring, 0) - if parseAll: - loc = self.preParse(instring, loc) - se = Empty() + StringEnd() - se._parse(instring, loc) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - else: - # catch and re-raise exception from here, clearing out pyparsing internal stack trace - raise exc.with_traceback(None) - else: - return tokens - - def scan_string( - self, - instring: str, - max_matches: int = _MAX_INT, - overlap: bool = False, - *, - debug: bool = False, - maxMatches: int = _MAX_INT, - ) -> Generator[Tuple[ParseResults, int, int], None, None]: - """ - Scan the input string for expression matches. Each match will return the - matching tokens, start location, and end location. May be called with optional - ``max_matches`` argument, to clip scanning after 'n' matches are found. If - ``overlap`` is specified, then overlapping matches will be reported. - - Note that the start and end locations are reported relative to the string - being parsed. See :class:`parse_string` for more information on parsing - strings with embedded tabs. - - Example:: - - source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" - print(source) - for tokens, start, end in Word(alphas).scan_string(source): - print(' '*start + '^'*(end-start)) - print(' '*start + tokens[0]) - - prints:: - - sldjf123lsdjjkf345sldkjf879lkjsfd987 - ^^^^^ - sldjf - ^^^^^^^ - lsdjjkf - ^^^^^^ - sldkjf - ^^^^^^ - lkjsfd - """ - maxMatches = min(maxMatches, max_matches) - if not self.streamlined: - self.streamline() - for e in self.ignoreExprs: - e.streamline() - - if not self.keepTabs: - instring = str(instring).expandtabs() - instrlen = len(instring) - loc = 0 - preparseFn = self.preParse - parseFn = self._parse - ParserElement.resetCache() - matches = 0 - try: - while loc <= instrlen and matches < maxMatches: - try: - preloc: int = preparseFn(instring, loc) - nextLoc: int - tokens: ParseResults - nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) - except ParseException: - loc = preloc + 1 - else: - if nextLoc > loc: - matches += 1 - if debug: - print( - { - "tokens": tokens.asList(), - "start": preloc, - "end": nextLoc, - } - ) - yield tokens, preloc, nextLoc - if overlap: - nextloc = preparseFn(instring, loc) - if nextloc > loc: - loc = nextLoc - else: - loc += 1 - else: - loc = nextLoc - else: - loc = preloc + 1 - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc.with_traceback(None) - - def transform_string(self, instring: str, *, debug: bool = False) -> str: - """ - Extension to :class:`scan_string`, to modify matching text with modified tokens that may - be returned from a parse action. To use ``transform_string``, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking ``transform_string()`` on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse - action. ``transform_string()`` returns the resulting transformed string. - - Example:: - - wd = Word(alphas) - wd.set_parse_action(lambda toks: toks[0].title()) - - print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) - - prints:: - - Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. - """ - out: List[str] = [] - lastE = 0 - # force preservation of s, to minimize unwanted transformation of string, and to - # keep string locs straight between transform_string and scan_string - self.keepTabs = True - try: - for t, s, e in self.scan_string(instring, debug=debug): - out.append(instring[lastE:s]) - lastE = e - - if not t: - continue - - if isinstance(t, ParseResults): - out += t.as_list() - elif isinstance(t, Iterable) and not isinstance(t, str_type): - out.extend(t) - else: - out.append(t) - - out.append(instring[lastE:]) - out = [o for o in out if o] - return "".join([str(s) for s in _flatten(out)]) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc.with_traceback(None) - - def search_string( - self, - instring: str, - max_matches: int = _MAX_INT, - *, - debug: bool = False, - maxMatches: int = _MAX_INT, - ) -> ParseResults: - """ - Another extension to :class:`scan_string`, simplifying the access to the tokens found - to match the given parse expression. May be called with optional - ``max_matches`` argument, to clip searching after 'n' matches are found. - - Example:: - - # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters - cap_word = Word(alphas.upper(), alphas.lower()) - - print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) - - # the sum() builtin can be used to merge results into a single ParseResults object - print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) - - prints:: - - [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] - ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] - """ - maxMatches = min(maxMatches, max_matches) - try: - return ParseResults( - [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] - ) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc.with_traceback(None) - - def split( - self, - instring: str, - maxsplit: int = _MAX_INT, - include_separators: bool = False, - *, - includeSeparators=False, - ) -> Generator[str, None, None]: - """ - Generator method to split a string using the given expression as a separator. - May be called with optional ``maxsplit`` argument, to limit the number of splits; - and the optional ``include_separators`` argument (default= ``False``), if the separating - matching text should be included in the split results. - - Example:: - - punc = one_of(list(".,;:/-!?")) - print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) - - prints:: - - ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] - """ - includeSeparators = includeSeparators or include_separators - last = 0 - for t, s, e in self.scan_string(instring, max_matches=maxsplit): - yield instring[last:s] - if includeSeparators: - yield t[0] - last = e - yield instring[last:] - - def __add__(self, other) -> "ParserElement": - """ - Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` - converts them to :class:`Literal`\\ s by default. - - Example:: - - greet = Word(alphas) + "," + Word(alphas) + "!" - hello = "Hello, World!" - print(hello, "->", greet.parse_string(hello)) - - prints:: - - Hello, World! -> ['Hello', ',', 'World', '!'] - - ``...`` may be used as a parse expression as a short form of :class:`SkipTo`:: - - Literal('start') + ... + Literal('end') - - is equivalent to:: - - Literal('start') + SkipTo('end')("_skipped*") + Literal('end') - - Note that the skipped text is returned with '_skipped' as a results name, - and to support having multiple skips in the same parser, the value returned is - a list of all skipped text. - """ - if other is Ellipsis: - return _PendingSkip(self) - - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return And([self, other]) - - def __radd__(self, other) -> "ParserElement": - """ - Implementation of ``+`` operator when left operand is not a :class:`ParserElement` - """ - if other is Ellipsis: - return SkipTo(self)("_skipped*") + self - - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return other + self - - def __sub__(self, other) -> "ParserElement": - """ - Implementation of ``-`` operator, returns :class:`And` with error stop - """ - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return self + And._ErrorStop() + other - - def __rsub__(self, other) -> "ParserElement": - """ - Implementation of ``-`` operator when left operand is not a :class:`ParserElement` - """ - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return other - self - - def __mul__(self, other) -> "ParserElement": - """ - Implementation of ``*`` operator, allows use of ``expr * 3`` in place of - ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer - tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples - may also include ``None`` as in: - - - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent - to ``expr*n + ZeroOrMore(expr)`` - (read as "at least n instances of ``expr``") - - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` - (read as "0 to n instances of ``expr``") - - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` - - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` - - Note that ``expr*(None, n)`` does not raise an exception if - more than n exprs exist in the input stream; that is, - ``expr*(None, n)`` does not enforce a maximum number of expr - occurrences. If this behavior is desired, then write - ``expr*(None, n) + ~expr`` - """ - if other is Ellipsis: - other = (0, None) - elif isinstance(other, tuple) and other[:1] == (Ellipsis,): - other = ((0,) + other[1:] + (None,))[:2] - - if not isinstance(other, (int, tuple)): - return NotImplemented - - if isinstance(other, int): - minElements, optElements = other, 0 - else: - other = tuple(o if o is not Ellipsis else None for o in other) - other = (other + (None, None))[:2] - if other[0] is None: - other = (0, other[1]) - if isinstance(other[0], int) and other[1] is None: - if other[0] == 0: - return ZeroOrMore(self) - if other[0] == 1: - return OneOrMore(self) - else: - return self * other[0] + ZeroOrMore(self) - elif isinstance(other[0], int) and isinstance(other[1], int): - minElements, optElements = other - optElements -= minElements - else: - return NotImplemented - - if minElements < 0: - raise ValueError("cannot multiply ParserElement by negative value") - if optElements < 0: - raise ValueError( - "second tuple value must be greater or equal to first tuple value" - ) - if minElements == optElements == 0: - return And([]) - - if optElements: - - def makeOptionalList(n): - if n > 1: - return Opt(self + makeOptionalList(n - 1)) - else: - return Opt(self) - - if minElements: - if minElements == 1: - ret = self + makeOptionalList(optElements) - else: - ret = And([self] * minElements) + makeOptionalList(optElements) - else: - ret = makeOptionalList(optElements) - else: - if minElements == 1: - ret = self - else: - ret = And([self] * minElements) - return ret - - def __rmul__(self, other) -> "ParserElement": - return self.__mul__(other) - - def __or__(self, other) -> "ParserElement": - """ - Implementation of ``|`` operator - returns :class:`MatchFirst` - """ - if other is Ellipsis: - return _PendingSkip(self, must_skip=True) - - if isinstance(other, str_type): - # `expr | ""` is equivalent to `Opt(expr)` - if other == "": - return Opt(self) - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return MatchFirst([self, other]) - - def __ror__(self, other) -> "ParserElement": - """ - Implementation of ``|`` operator when left operand is not a :class:`ParserElement` - """ - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return other | self - - def __xor__(self, other) -> "ParserElement": - """ - Implementation of ``^`` operator - returns :class:`Or` - """ - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return Or([self, other]) - - def __rxor__(self, other) -> "ParserElement": - """ - Implementation of ``^`` operator when left operand is not a :class:`ParserElement` - """ - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return other ^ self - - def __and__(self, other) -> "ParserElement": - """ - Implementation of ``&`` operator - returns :class:`Each` - """ - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return Each([self, other]) - - def __rand__(self, other) -> "ParserElement": - """ - Implementation of ``&`` operator when left operand is not a :class:`ParserElement` - """ - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return other & self - - def __invert__(self) -> "ParserElement": - """ - Implementation of ``~`` operator - returns :class:`NotAny` - """ - return NotAny(self) - - # disable __iter__ to override legacy use of sequential access to __getitem__ to - # iterate over a sequence - __iter__ = None - - def __getitem__(self, key): - """ - use ``[]`` indexing notation as a short form for expression repetition: - - - ``expr[n]`` is equivalent to ``expr*n`` - - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` - - ``expr[n, ...]`` or ``expr[n,]`` is equivalent - to ``expr*n + ZeroOrMore(expr)`` - (read as "at least n instances of ``expr``") - - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` - (read as "0 to n instances of ``expr``") - - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` - - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` - - ``None`` may be used in place of ``...``. - - Note that ``expr[..., n]`` and ``expr[m, n]`` do not raise an exception - if more than ``n`` ``expr``\\ s exist in the input stream. If this behavior is - desired, then write ``expr[..., n] + ~expr``. - - For repetition with a stop_on expression, use slice notation: - - - ``expr[...: end_expr]`` and ``expr[0, ...: end_expr]`` are equivalent to ``ZeroOrMore(expr, stop_on=end_expr)`` - - ``expr[1, ...: end_expr]`` is equivalent to ``OneOrMore(expr, stop_on=end_expr)`` - - """ - - stop_on_defined = False - stop_on = NoMatch() - if isinstance(key, slice): - key, stop_on = key.start, key.stop - if key is None: - key = ... - stop_on_defined = True - elif isinstance(key, tuple) and isinstance(key[-1], slice): - key, stop_on = (key[0], key[1].start), key[1].stop - stop_on_defined = True - - # convert single arg keys to tuples - if isinstance(key, str_type): - key = (key,) - try: - iter(key) - except TypeError: - key = (key, key) - - if len(key) > 2: - raise TypeError( - f"only 1 or 2 index arguments supported ({key[:5]}{f'... [{len(key)}]' if len(key) > 5 else ''})" - ) - - # clip to 2 elements - ret = self * tuple(key[:2]) - ret = typing.cast(_MultipleMatch, ret) - - if stop_on_defined: - ret.stopOn(stop_on) - - return ret - - def __call__(self, name: typing.Optional[str] = None) -> "ParserElement": - """ - Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. - - If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be - passed as ``True``. - - If ``name`` is omitted, same as calling :class:`copy`. - - Example:: - - # these are equivalent - userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") - userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") - """ - if name is not None: - return self._setResultsName(name) - - return self.copy() - - def suppress(self) -> "ParserElement": - """ - Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from - cluttering up returned output. - """ - return Suppress(self) - - def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": - """ - Enables the skipping of whitespace before matching the characters in the - :class:`ParserElement`'s defined pattern. - - :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) - """ - self.skipWhitespace = True - return self - - def leave_whitespace(self, recursive: bool = True) -> "ParserElement": - """ - Disables the skipping of whitespace before matching the characters in the - :class:`ParserElement`'s defined pattern. This is normally only used internally by - the pyparsing module, but may be needed in some whitespace-sensitive grammars. - - :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) - """ - self.skipWhitespace = False - return self - - def set_whitespace_chars( - self, chars: Union[Set[str], str], copy_defaults: bool = False - ) -> "ParserElement": - """ - Overrides the default whitespace chars - """ - self.skipWhitespace = True - self.whiteChars = set(chars) - self.copyDefaultWhiteChars = copy_defaults - return self - - def parse_with_tabs(self) -> "ParserElement": - """ - Overrides default behavior to expand ```` s to spaces before parsing the input string. - Must be called before ``parse_string`` when the input grammar contains elements that - match ```` characters. - """ - self.keepTabs = True - return self - - def ignore(self, other: "ParserElement") -> "ParserElement": - """ - Define expression to be ignored (e.g., comments) while doing pattern - matching; may be called repeatedly, to define multiple comment or other - ignorable patterns. - - Example:: - - patt = Word(alphas)[...] - patt.parse_string('ablaj /* comment */ lskjd') - # -> ['ablaj'] - - patt.ignore(c_style_comment) - patt.parse_string('ablaj /* comment */ lskjd') - # -> ['ablaj', 'lskjd'] - """ - if isinstance(other, str_type): - other = Suppress(other) - - if isinstance(other, Suppress): - if other not in self.ignoreExprs: - self.ignoreExprs.append(other) - else: - self.ignoreExprs.append(Suppress(other.copy())) - return self - - def set_debug_actions( - self, - start_action: DebugStartAction, - success_action: DebugSuccessAction, - exception_action: DebugExceptionAction, - ) -> "ParserElement": - """ - Customize display of debugging messages while doing pattern matching: - - - ``start_action`` - method to be called when an expression is about to be parsed; - should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` - - - ``success_action`` - method to be called when an expression has successfully parsed; - should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` - - - ``exception_action`` - method to be called when expression fails to parse; - should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` - """ - self.debugActions = self.DebugActions( - start_action or _default_start_debug_action, # type: ignore[truthy-function] - success_action or _default_success_debug_action, # type: ignore[truthy-function] - exception_action or _default_exception_debug_action, # type: ignore[truthy-function] - ) - self.debug = True - return self - - def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement": - """ - Enable display of debugging messages while doing pattern matching. - Set ``flag`` to ``True`` to enable, ``False`` to disable. - Set ``recurse`` to ``True`` to set the debug flag on this expression and all sub-expressions. - - Example:: - - wd = Word(alphas).set_name("alphaword") - integer = Word(nums).set_name("numword") - term = wd | integer - - # turn on debugging for wd - wd.set_debug() - - term[1, ...].parse_string("abc 123 xyz 890") - - prints:: - - Match alphaword at loc 0(1,1) - Matched alphaword -> ['abc'] - Match alphaword at loc 3(1,4) - Exception raised:Expected alphaword (at char 4), (line:1, col:5) - Match alphaword at loc 7(1,8) - Matched alphaword -> ['xyz'] - Match alphaword at loc 11(1,12) - Exception raised:Expected alphaword (at char 12), (line:1, col:13) - Match alphaword at loc 15(1,16) - Exception raised:Expected alphaword (at char 15), (line:1, col:16) - - The output shown is that produced by the default debug actions - custom debug actions can be - specified using :class:`set_debug_actions`. Prior to attempting - to match the ``wd`` expression, the debugging message ``"Match at loc (,)"`` - is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` - message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, - which makes debugging and exception messages easier to understand - for instance, the default - name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. - """ - if recurse: - for expr in self.visit_all(): - expr.set_debug(flag, recurse=False) - return self - - if flag: - self.set_debug_actions( - _default_start_debug_action, - _default_success_debug_action, - _default_exception_debug_action, - ) - else: - self.debug = False - return self - - @property - def default_name(self) -> str: - if self._defaultName is None: - self._defaultName = self._generateDefaultName() - return self._defaultName - - @abstractmethod - def _generateDefaultName(self) -> str: - """ - Child classes must define this method, which defines how the ``default_name`` is set. - """ - - def set_name(self, name: str) -> "ParserElement": - """ - Define name for this expression, makes debugging and exception messages clearer. - - Example:: - - integer = Word(nums) - integer.parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) - - integer.set_name("integer") - integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) - """ - self.customName = name - self.errmsg = f"Expected {self.name}" - if __diag__.enable_debug_on_named_expressions: - self.set_debug() - return self - - @property - def name(self) -> str: - # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name - return self.customName if self.customName is not None else self.default_name - - def __str__(self) -> str: - return self.name - - def __repr__(self) -> str: - return str(self) - - def streamline(self) -> "ParserElement": - self.streamlined = True - self._defaultName = None - return self - - def recurse(self) -> List["ParserElement"]: - return [] - - def _checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] - for e in self.recurse(): - e._checkRecursion(subRecCheckList) - - def validate(self, validateTrace=None) -> None: - """ - Check defined expressions for valid structure, check for infinite recursive definitions. - """ - warnings.warn( - "ParserElement.validate() is deprecated, and should not be used to check for left recursion", - DeprecationWarning, - stacklevel=2, - ) - self._checkRecursion([]) - - def parse_file( - self, - file_or_filename: Union[str, Path, TextIO], - encoding: str = "utf-8", - parse_all: bool = False, - *, - parseAll: bool = False, - ) -> ParseResults: - """ - Execute the parse expression on the given file or filename. - If a filename is specified (instead of a file object), - the entire file is opened, read, and closed before parsing. - """ - parseAll = parseAll or parse_all - try: - file_or_filename = typing.cast(TextIO, file_or_filename) - file_contents = file_or_filename.read() - except AttributeError: - file_or_filename = typing.cast(str, file_or_filename) - with open(file_or_filename, "r", encoding=encoding) as f: - file_contents = f.read() - try: - return self.parse_string(file_contents, parseAll) - except ParseBaseException as exc: - if ParserElement.verbose_stacktrace: - raise - - # catch and re-raise exception from here, clears out pyparsing internal stack trace - raise exc.with_traceback(None) - - def __eq__(self, other): - if self is other: - return True - elif isinstance(other, str_type): - return self.matches(other, parse_all=True) - elif isinstance(other, ParserElement): - return vars(self) == vars(other) - return False - - def __hash__(self): - return id(self) - - def matches( - self, test_string: str, parse_all: bool = True, *, parseAll: bool = True - ) -> bool: - """ - Method for quick testing of a parser against a test string. Good for simple - inline microtests of sub expressions while building up larger parser. - - Parameters: - - - ``test_string`` - to test against this expression for a match - - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests - - Example:: - - expr = Word(nums) - assert expr.matches("100") - """ - parseAll = parseAll and parse_all - try: - self.parse_string(str(test_string), parse_all=parseAll) - return True - except ParseBaseException: - return False - - def run_tests( - self, - tests: Union[str, List[str]], - parse_all: bool = True, - comment: typing.Optional[Union["ParserElement", str]] = "#", - full_dump: bool = True, - print_results: bool = True, - failure_tests: bool = False, - post_parse: typing.Optional[Callable[[str, ParseResults], str]] = None, - file: typing.Optional[TextIO] = None, - with_line_numbers: bool = False, - *, - parseAll: bool = True, - fullDump: bool = True, - printResults: bool = True, - failureTests: bool = False, - postParse: typing.Optional[Callable[[str, ParseResults], str]] = None, - ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: - """ - Execute the parse expression on a series of test strings, showing each - test, the parsed results or where the parse failed. Quick and easy way to - run a parse expression against a list of sample strings. - - Parameters: - - - ``tests`` - a list of separate test strings, or a multiline string of test strings - - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests - - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test - string; pass None to disable comment filtering - - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; - if False, only dump nested list - - ``print_results`` - (default= ``True``) prints test output to stdout - - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing - - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as - `fn(test_string, parse_results)` and returns a string to be added to the test output - - ``file`` - (default= ``None``) optional file-like object to which test output will be written; - if None, will default to ``sys.stdout`` - - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers - - Returns: a (success, results) tuple, where success indicates that all tests succeeded - (or failed if ``failure_tests`` is True), and the results contain a list of lines of each - test's output - - Example:: - - number_expr = pyparsing_common.number.copy() - - result = number_expr.run_tests(''' - # unsigned integer - 100 - # negative integer - -100 - # float with scientific notation - 6.02e23 - # integer with scientific notation - 1e-12 - ''') - print("Success" if result[0] else "Failed!") - - result = number_expr.run_tests(''' - # stray character - 100Z - # missing leading digit before '.' - -.100 - # too many '.' - 3.14.159 - ''', failure_tests=True) - print("Success" if result[0] else "Failed!") - - prints:: - - # unsigned integer - 100 - [100] - - # negative integer - -100 - [-100] - - # float with scientific notation - 6.02e23 - [6.02e+23] - - # integer with scientific notation - 1e-12 - [1e-12] - - Success - - # stray character - 100Z - ^ - FAIL: Expected end of text (at char 3), (line:1, col:4) - - # missing leading digit before '.' - -.100 - ^ - FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) - - # too many '.' - 3.14.159 - ^ - FAIL: Expected end of text (at char 4), (line:1, col:5) - - Success - - Each test string must be on a single line. If you want to test a string that spans multiple - lines, create a test like this:: - - expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") - - (Note that this is a raw string literal, you must include the leading ``'r'``.) - """ - from .testing import pyparsing_test - - parseAll = parseAll and parse_all - fullDump = fullDump and full_dump - printResults = printResults and print_results - failureTests = failureTests or failure_tests - postParse = postParse or post_parse - if isinstance(tests, str_type): - tests = typing.cast(str, tests) - line_strip = type(tests).strip - tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()] - comment_specified = comment is not None - if comment_specified: - if isinstance(comment, str_type): - comment = typing.cast(str, comment) - comment = Literal(comment) - comment = typing.cast(ParserElement, comment) - if file is None: - file = sys.stdout - print_ = file.write - - result: Union[ParseResults, Exception] - allResults: List[Tuple[str, Union[ParseResults, Exception]]] = [] - comments: List[str] = [] - success = True - NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) - BOM = "\ufeff" - nlstr = "\n" - for t in tests: - if comment_specified and comment.matches(t, False) or comments and not t: - comments.append( - pyparsing_test.with_line_numbers(t) if with_line_numbers else t - ) - continue - if not t: - continue - out = [ - f"{nlstr}{nlstr.join(comments) if comments else ''}", - pyparsing_test.with_line_numbers(t) if with_line_numbers else t, - ] - comments = [] - try: - # convert newline marks to actual newlines, and strip leading BOM if present - t = NL.transform_string(t.lstrip(BOM)) - result = self.parse_string(t, parse_all=parseAll) - except ParseBaseException as pe: - fatal = "(FATAL) " if isinstance(pe, ParseFatalException) else "" - out.append(pe.explain()) - out.append(f"FAIL: {fatal}{pe}") - if ParserElement.verbose_stacktrace: - out.extend(traceback.format_tb(pe.__traceback__)) - success = success and failureTests - result = pe - except Exception as exc: - out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}") - if ParserElement.verbose_stacktrace: - out.extend(traceback.format_tb(exc.__traceback__)) - success = success and failureTests - result = exc - else: - success = success and not failureTests - if postParse is not None: - try: - pp_value = postParse(t, result) - if pp_value is not None: - if isinstance(pp_value, ParseResults): - out.append(pp_value.dump()) - else: - out.append(str(pp_value)) - else: - out.append(result.dump()) - except Exception as e: - out.append(result.dump(full=fullDump)) - out.append( - f"{postParse.__name__} failed: {type(e).__name__}: {e}" - ) - else: - out.append(result.dump(full=fullDump)) - out.append("") - - if printResults: - print_("\n".join(out)) - - allResults.append((t, result)) - - return success, allResults - - def create_diagram( - self, - output_html: Union[TextIO, Path, str], - vertical: int = 3, - show_results_names: bool = False, - show_groups: bool = False, - embed: bool = False, - **kwargs, - ) -> None: - """ - Create a railroad diagram for the parser. - - Parameters: - - - ``output_html`` (str or file-like object) - output target for generated - diagram HTML - - ``vertical`` (int) - threshold for formatting multiple alternatives vertically - instead of horizontally (default=3) - - ``show_results_names`` - bool flag whether diagram should show annotations for - defined results names - - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box - - ``embed`` - bool flag whether generated HTML should omit , , and tags to embed - the resulting HTML in an enclosing HTML source - - ``head`` - str containing additional HTML to insert into the section of the generated code; - can be used to insert custom CSS styling - - ``body`` - str containing additional HTML to insert at the beginning of the section of the - generated code - - Additional diagram-formatting keyword arguments can also be included; - see railroad.Diagram class. - """ - - try: - from .diagram import to_railroad, railroad_to_html - except ImportError as ie: - raise Exception( - "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" - ) from ie - - self.streamline() - - railroad = to_railroad( - self, - vertical=vertical, - show_results_names=show_results_names, - show_groups=show_groups, - diagram_kwargs=kwargs, - ) - if not isinstance(output_html, (str, Path)): - # we were passed a file-like object, just write to it - output_html.write(railroad_to_html(railroad, embed=embed, **kwargs)) - return - - with open(output_html, "w", encoding="utf-8") as diag_file: - diag_file.write(railroad_to_html(railroad, embed=embed, **kwargs)) - - # Compatibility synonyms - # fmt: off - inlineLiteralsUsing = replaced_by_pep8("inlineLiteralsUsing", inline_literals_using) - setDefaultWhitespaceChars = replaced_by_pep8( - "setDefaultWhitespaceChars", set_default_whitespace_chars - ) - setResultsName = replaced_by_pep8("setResultsName", set_results_name) - setBreak = replaced_by_pep8("setBreak", set_break) - setParseAction = replaced_by_pep8("setParseAction", set_parse_action) - addParseAction = replaced_by_pep8("addParseAction", add_parse_action) - addCondition = replaced_by_pep8("addCondition", add_condition) - setFailAction = replaced_by_pep8("setFailAction", set_fail_action) - tryParse = replaced_by_pep8("tryParse", try_parse) - enableLeftRecursion = replaced_by_pep8("enableLeftRecursion", enable_left_recursion) - enablePackrat = replaced_by_pep8("enablePackrat", enable_packrat) - parseString = replaced_by_pep8("parseString", parse_string) - scanString = replaced_by_pep8("scanString", scan_string) - transformString = replaced_by_pep8("transformString", transform_string) - searchString = replaced_by_pep8("searchString", search_string) - ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) - leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) - setWhitespaceChars = replaced_by_pep8("setWhitespaceChars", set_whitespace_chars) - parseWithTabs = replaced_by_pep8("parseWithTabs", parse_with_tabs) - setDebugActions = replaced_by_pep8("setDebugActions", set_debug_actions) - setDebug = replaced_by_pep8("setDebug", set_debug) - setName = replaced_by_pep8("setName", set_name) - parseFile = replaced_by_pep8("parseFile", parse_file) - runTests = replaced_by_pep8("runTests", run_tests) - canParseNext = can_parse_next - resetCache = reset_cache - defaultName = default_name - # fmt: on - - -class _PendingSkip(ParserElement): - # internal placeholder class to hold a place were '...' is added to a parser element, - # once another ParserElement is added, this placeholder will be replaced with a SkipTo - def __init__(self, expr: ParserElement, must_skip: bool = False): - super().__init__() - self.anchor = expr - self.must_skip = must_skip - - def _generateDefaultName(self) -> str: - return str(self.anchor + Empty()).replace("Empty", "...") - - def __add__(self, other) -> "ParserElement": - skipper = SkipTo(other).set_name("...")("_skipped*") - if self.must_skip: - - def must_skip(t): - if not t._skipped or t._skipped.as_list() == [""]: - del t[0] - t.pop("_skipped", None) - - def show_skip(t): - if t._skipped.as_list()[-1:] == [""]: - t.pop("_skipped") - t["_skipped"] = f"missing <{self.anchor!r}>" - - return ( - self.anchor + skipper().add_parse_action(must_skip) - | skipper().add_parse_action(show_skip) - ) + other - - return self.anchor + skipper + other - - def __repr__(self): - return self.defaultName - - def parseImpl(self, *args): - raise Exception( - "use of `...` expression without following SkipTo target expression" - ) - - -class Token(ParserElement): - """Abstract :class:`ParserElement` subclass, for defining atomic - matching patterns. - """ - - def __init__(self): - super().__init__(savelist=False) - - def _generateDefaultName(self) -> str: - return type(self).__name__ - - -class NoMatch(Token): - """ - A token that will never match. - """ - - def __init__(self): - super().__init__() - self.mayReturnEmpty = True - self.mayIndexError = False - self.errmsg = "Unmatchable token" - - def parseImpl(self, instring, loc, doActions=True): - raise ParseException(instring, loc, self.errmsg, self) - - -class Literal(Token): - """ - Token to exactly match a specified string. - - Example:: - - Literal('abc').parse_string('abc') # -> ['abc'] - Literal('abc').parse_string('abcdef') # -> ['abc'] - Literal('abc').parse_string('ab') # -> Exception: Expected "abc" - - For case-insensitive matching, use :class:`CaselessLiteral`. - - For keyword matching (force word break before and after the matched string), - use :class:`Keyword` or :class:`CaselessKeyword`. - """ - - def __new__(cls, match_string: str = "", *, matchString: str = ""): - # Performance tuning: select a subclass with optimized parseImpl - if cls is Literal: - match_string = matchString or match_string - if not match_string: - return super().__new__(Empty) - if len(match_string) == 1: - return super().__new__(_SingleCharLiteral) - - # Default behavior - return super().__new__(cls) - - # Needed to make copy.copy() work correctly if we customize __new__ - def __getnewargs__(self): - return (self.match,) - - def __init__(self, match_string: str = "", *, matchString: str = ""): - super().__init__() - match_string = matchString or match_string - self.match = match_string - self.matchLen = len(match_string) - self.firstMatchChar = match_string[:1] - self.errmsg = f"Expected {self.name}" - self.mayReturnEmpty = False - self.mayIndexError = False - - def _generateDefaultName(self) -> str: - return repr(self.match) - - def parseImpl(self, instring, loc, doActions=True): - if instring[loc] == self.firstMatchChar and instring.startswith( - self.match, loc - ): - return loc + self.matchLen, self.match - raise ParseException(instring, loc, self.errmsg, self) - - -class Empty(Literal): - """ - An empty token, will always match. - """ - - def __init__(self, match_string="", *, matchString=""): - super().__init__("") - self.mayReturnEmpty = True - self.mayIndexError = False - - def _generateDefaultName(self) -> str: - return "Empty" - - def parseImpl(self, instring, loc, doActions=True): - return loc, [] - - -class _SingleCharLiteral(Literal): - def parseImpl(self, instring, loc, doActions=True): - if instring[loc] == self.firstMatchChar: - return loc + 1, self.match - raise ParseException(instring, loc, self.errmsg, self) - - -ParserElement._literalStringClass = Literal - - -class Keyword(Token): - """ - Token to exactly match a specified string as a keyword, that is, - it must be immediately preceded and followed by whitespace or - non-keyword characters. Compare with :class:`Literal`: - - - ``Literal("if")`` will match the leading ``'if'`` in - ``'ifAndOnlyIf'``. - - ``Keyword("if")`` will not; it will only match the leading - ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` - - Accepts two optional constructor arguments in addition to the - keyword string: - - - ``ident_chars`` is a string of characters that would be valid - identifier characters, defaulting to all alphanumerics + "_" and - "$" - - ``caseless`` allows case-insensitive matching, default is ``False``. - - Example:: - - Keyword("start").parse_string("start") # -> ['start'] - Keyword("start").parse_string("starting") # -> Exception - - For case-insensitive matching, use :class:`CaselessKeyword`. - """ - - DEFAULT_KEYWORD_CHARS = alphanums + "_$" - - def __init__( - self, - match_string: str = "", - ident_chars: typing.Optional[str] = None, - caseless: bool = False, - *, - matchString: str = "", - identChars: typing.Optional[str] = None, - ): - super().__init__() - identChars = identChars or ident_chars - if identChars is None: - identChars = Keyword.DEFAULT_KEYWORD_CHARS - match_string = matchString or match_string - self.match = match_string - self.matchLen = len(match_string) - try: - self.firstMatchChar = match_string[0] - except IndexError: - raise ValueError("null string passed to Keyword; use Empty() instead") - self.errmsg = f"Expected {type(self).__name__} {self.name}" - self.mayReturnEmpty = False - self.mayIndexError = False - self.caseless = caseless - if caseless: - self.caselessmatch = match_string.upper() - identChars = identChars.upper() - self.identChars = set(identChars) - - def _generateDefaultName(self) -> str: - return repr(self.match) - - def parseImpl(self, instring, loc, doActions=True): - errmsg = self.errmsg - errloc = loc - if self.caseless: - if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: - if loc == 0 or instring[loc - 1].upper() not in self.identChars: - if ( - loc >= len(instring) - self.matchLen - or instring[loc + self.matchLen].upper() not in self.identChars - ): - return loc + self.matchLen, self.match - - # followed by keyword char - errmsg += ", was immediately followed by keyword character" - errloc = loc + self.matchLen - else: - # preceded by keyword char - errmsg += ", keyword was immediately preceded by keyword character" - errloc = loc - 1 - # else no match just raise plain exception - - elif ( - instring[loc] == self.firstMatchChar - and self.matchLen == 1 - or instring.startswith(self.match, loc) - ): - if loc == 0 or instring[loc - 1] not in self.identChars: - if ( - loc >= len(instring) - self.matchLen - or instring[loc + self.matchLen] not in self.identChars - ): - return loc + self.matchLen, self.match - - # followed by keyword char - errmsg += ", keyword was immediately followed by keyword character" - errloc = loc + self.matchLen - else: - # preceded by keyword char - errmsg += ", keyword was immediately preceded by keyword character" - errloc = loc - 1 - # else no match just raise plain exception - - raise ParseException(instring, errloc, errmsg, self) - - @staticmethod - def set_default_keyword_chars(chars) -> None: - """ - Overrides the default characters used by :class:`Keyword` expressions. - """ - Keyword.DEFAULT_KEYWORD_CHARS = chars - - setDefaultKeywordChars = set_default_keyword_chars - - -class CaselessLiteral(Literal): - """ - Token to match a specified string, ignoring case of letters. - Note: the matched results will always be in the case of the given - match string, NOT the case of the input text. - - Example:: - - CaselessLiteral("CMD")[1, ...].parse_string("cmd CMD Cmd10") - # -> ['CMD', 'CMD', 'CMD'] - - (Contrast with example for :class:`CaselessKeyword`.) - """ - - def __init__(self, match_string: str = "", *, matchString: str = ""): - match_string = matchString or match_string - super().__init__(match_string.upper()) - # Preserve the defining literal. - self.returnString = match_string - self.errmsg = f"Expected {self.name}" - - def parseImpl(self, instring, loc, doActions=True): - if instring[loc : loc + self.matchLen].upper() == self.match: - return loc + self.matchLen, self.returnString - raise ParseException(instring, loc, self.errmsg, self) - - -class CaselessKeyword(Keyword): - """ - Caseless version of :class:`Keyword`. - - Example:: - - CaselessKeyword("CMD")[1, ...].parse_string("cmd CMD Cmd10") - # -> ['CMD', 'CMD'] - - (Contrast with example for :class:`CaselessLiteral`.) - """ - - def __init__( - self, - match_string: str = "", - ident_chars: typing.Optional[str] = None, - *, - matchString: str = "", - identChars: typing.Optional[str] = None, - ): - identChars = identChars or ident_chars - match_string = matchString or match_string - super().__init__(match_string, identChars, caseless=True) - - -class CloseMatch(Token): - """A variation on :class:`Literal` which matches "close" matches, - that is, strings with at most 'n' mismatching characters. - :class:`CloseMatch` takes parameters: - - - ``match_string`` - string to be matched - - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters - - ``max_mismatches`` - (``default=1``) maximum number of - mismatches allowed to count as a match - - The results from a successful parse will contain the matched text - from the input string and the following named results: - - - ``mismatches`` - a list of the positions within the - match_string where mismatches were found - - ``original`` - the original match_string used to compare - against the input string - - If ``mismatches`` is an empty list, then the match was an exact - match. - - Example:: - - patt = CloseMatch("ATCATCGAATGGA") - patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) - patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) - - # exact match - patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) - - # close match allowing up to 2 mismatches - patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) - patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) - """ - - def __init__( - self, - match_string: str, - max_mismatches: typing.Optional[int] = None, - *, - maxMismatches: int = 1, - caseless=False, - ): - maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches - super().__init__() - self.match_string = match_string - self.maxMismatches = maxMismatches - self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" - self.caseless = caseless - self.mayIndexError = False - self.mayReturnEmpty = False - - def _generateDefaultName(self) -> str: - return f"{type(self).__name__}:{self.match_string!r}" - - def parseImpl(self, instring, loc, doActions=True): - start = loc - instrlen = len(instring) - maxloc = start + len(self.match_string) - - if maxloc <= instrlen: - match_string = self.match_string - match_stringloc = 0 - mismatches = [] - maxMismatches = self.maxMismatches - - for match_stringloc, s_m in enumerate( - zip(instring[loc:maxloc], match_string) - ): - src, mat = s_m - if self.caseless: - src, mat = src.lower(), mat.lower() - - if src != mat: - mismatches.append(match_stringloc) - if len(mismatches) > maxMismatches: - break - else: - loc = start + match_stringloc + 1 - results = ParseResults([instring[start:loc]]) - results["original"] = match_string - results["mismatches"] = mismatches - return loc, results - - raise ParseException(instring, loc, self.errmsg, self) - - -class Word(Token): - """Token for matching words composed of allowed character sets. - - Parameters: - - - ``init_chars`` - string of all characters that should be used to - match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; - if ``body_chars`` is also specified, then this is the string of - initial characters - - ``body_chars`` - string of characters that - can be used for matching after a matched initial character as - given in ``init_chars``; if omitted, same as the initial characters - (default=``None``) - - ``min`` - minimum number of characters to match (default=1) - - ``max`` - maximum number of characters to match (default=0) - - ``exact`` - exact number of characters to match (default=0) - - ``as_keyword`` - match as a keyword (default=``False``) - - ``exclude_chars`` - characters that might be - found in the input ``body_chars`` string but which should not be - accepted for matching ;useful to define a word of all - printables except for one or two characters, for instance - (default=``None``) - - :class:`srange` is useful for defining custom character set strings - for defining :class:`Word` expressions, using range notation from - regular expression character sets. - - A common mistake is to use :class:`Word` to match a specific literal - string, as in ``Word("Address")``. Remember that :class:`Word` - uses the string argument to define *sets* of matchable characters. - This expression would match "Add", "AAA", "dAred", or any other word - made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an - exact literal string, use :class:`Literal` or :class:`Keyword`. - - pyparsing includes helper strings for building Words: - - - :class:`alphas` - - :class:`nums` - - :class:`alphanums` - - :class:`hexnums` - - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 - - accented, tilded, umlauted, etc.) - - :class:`punc8bit` (non-alphabetic characters in ASCII range - 128-255 - currency, symbols, superscripts, diacriticals, etc.) - - :class:`printables` (any non-whitespace character) - - ``alphas``, ``nums``, and ``printables`` are also defined in several - Unicode sets - see :class:`pyparsing_unicode``. - - Example:: - - # a word composed of digits - integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) - - # a word with a leading capital, and zero or more lowercase - capitalized_word = Word(alphas.upper(), alphas.lower()) - - # hostnames are alphanumeric, with leading alpha, and '-' - hostname = Word(alphas, alphanums + '-') - - # roman numeral (not a strict parser, accepts invalid mix of characters) - roman = Word("IVXLCDM") - - # any string of non-whitespace characters, except for ',' - csv_value = Word(printables, exclude_chars=",") - """ - - def __init__( - self, - init_chars: str = "", - body_chars: typing.Optional[str] = None, - min: int = 1, - max: int = 0, - exact: int = 0, - as_keyword: bool = False, - exclude_chars: typing.Optional[str] = None, - *, - initChars: typing.Optional[str] = None, - bodyChars: typing.Optional[str] = None, - asKeyword: bool = False, - excludeChars: typing.Optional[str] = None, - ): - initChars = initChars or init_chars - bodyChars = bodyChars or body_chars - asKeyword = asKeyword or as_keyword - excludeChars = excludeChars or exclude_chars - super().__init__() - if not initChars: - raise ValueError( - f"invalid {type(self).__name__}, initChars cannot be empty string" - ) - - initChars_set = set(initChars) - if excludeChars: - excludeChars_set = set(excludeChars) - initChars_set -= excludeChars_set - if bodyChars: - bodyChars = "".join(set(bodyChars) - excludeChars_set) - self.initChars = initChars_set - self.initCharsOrig = "".join(sorted(initChars_set)) - - if bodyChars: - self.bodyChars = set(bodyChars) - self.bodyCharsOrig = "".join(sorted(bodyChars)) - else: - self.bodyChars = initChars_set - self.bodyCharsOrig = self.initCharsOrig - - self.maxSpecified = max > 0 - - if min < 1: - raise ValueError( - "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" - ) - - if self.maxSpecified and min > max: - raise ValueError( - f"invalid args, if min and max both specified min must be <= max (min={min}, max={max})" - ) - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - min = max = exact - self.maxLen = exact - self.minLen = exact - - self.errmsg = f"Expected {self.name}" - self.mayIndexError = False - self.asKeyword = asKeyword - if self.asKeyword: - self.errmsg += " as a keyword" - - # see if we can make a regex for this Word - if " " not in (self.initChars | self.bodyChars): - if len(self.initChars) == 1: - re_leading_fragment = re.escape(self.initCharsOrig) - else: - re_leading_fragment = f"[{_collapse_string_to_ranges(self.initChars)}]" - - if self.bodyChars == self.initChars: - if max == 0 and self.minLen == 1: - repeat = "+" - elif max == 1: - repeat = "" - else: - if self.minLen != self.maxLen: - repeat = f"{{{self.minLen},{'' if self.maxLen == _MAX_INT else self.maxLen}}}" - else: - repeat = f"{{{self.minLen}}}" - self.reString = f"{re_leading_fragment}{repeat}" - else: - if max == 1: - re_body_fragment = "" - repeat = "" - else: - re_body_fragment = f"[{_collapse_string_to_ranges(self.bodyChars)}]" - if max == 0 and self.minLen == 1: - repeat = "*" - elif max == 2: - repeat = "?" if min <= 1 else "" - else: - if min != max: - repeat = f"{{{min - 1 if min > 0 else ''},{max - 1 if max > 0 else ''}}}" - else: - repeat = f"{{{min - 1 if min > 0 else ''}}}" - - self.reString = f"{re_leading_fragment}{re_body_fragment}{repeat}" - - if self.asKeyword: - self.reString = rf"\b{self.reString}\b" - - try: - self.re = re.compile(self.reString) - except re.error: - self.re = None # type: ignore[assignment] - else: - self.re_match = self.re.match - self.parseImpl = self.parseImpl_regex # type: ignore[assignment] - - def _generateDefaultName(self) -> str: - def charsAsStr(s): - max_repr_len = 16 - s = _collapse_string_to_ranges(s, re_escape=False) - - if len(s) > max_repr_len: - return s[: max_repr_len - 3] + "..." - - return s - - if self.initChars != self.bodyChars: - base = f"W:({charsAsStr(self.initChars)}, {charsAsStr(self.bodyChars)})" - else: - base = f"W:({charsAsStr(self.initChars)})" - - # add length specification - if self.minLen > 1 or self.maxLen != _MAX_INT: - if self.minLen == self.maxLen: - if self.minLen == 1: - return base[2:] - else: - return base + f"{{{self.minLen}}}" - elif self.maxLen == _MAX_INT: - return base + f"{{{self.minLen},...}}" - else: - return base + f"{{{self.minLen},{self.maxLen}}}" - return base - - def parseImpl(self, instring, loc, doActions=True): - if instring[loc] not in self.initChars: - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - instrlen = len(instring) - bodychars = self.bodyChars - maxloc = start + self.maxLen - maxloc = min(maxloc, instrlen) - while loc < maxloc and instring[loc] in bodychars: - loc += 1 - - throwException = False - if loc - start < self.minLen: - throwException = True - elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: - throwException = True - elif self.asKeyword and ( - (start > 0 and instring[start - 1] in bodychars) - or (loc < instrlen and instring[loc] in bodychars) - ): - throwException = True - - if throwException: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - def parseImpl_regex(self, instring, loc, doActions=True): - result = self.re_match(instring, loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - return loc, result.group() - - -class Char(Word): - """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, - when defining a match of any single character in a string of - characters. - """ - - def __init__( - self, - charset: str, - as_keyword: bool = False, - exclude_chars: typing.Optional[str] = None, - *, - asKeyword: bool = False, - excludeChars: typing.Optional[str] = None, - ): - asKeyword = asKeyword or as_keyword - excludeChars = excludeChars or exclude_chars - super().__init__( - charset, exact=1, as_keyword=asKeyword, exclude_chars=excludeChars - ) - - -class Regex(Token): - r"""Token for matching strings that match a given regular - expression. Defined with string specifying the regular expression in - a form recognized by the stdlib Python `re module `_. - If the given regex contains named groups (defined using ``(?P...)``), - these will be preserved as named :class:`ParseResults`. - - If instead of the Python stdlib ``re`` module you wish to use a different RE module - (such as the ``regex`` module), you can do so by building your ``Regex`` object with - a compiled RE that was compiled using ``regex``. - - Example:: - - realnum = Regex(r"[+-]?\d+\.\d*") - # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression - roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") - - # named fields in a regex will be returned as named results - date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)') - - # the Regex class will accept re's compiled using the regex module - import regex - parser = pp.Regex(regex.compile(r'[0-9]')) - """ - - def __init__( - self, - pattern: Any, - flags: Union[re.RegexFlag, int] = 0, - as_group_list: bool = False, - as_match: bool = False, - *, - asGroupList: bool = False, - asMatch: bool = False, - ): - """The parameters ``pattern`` and ``flags`` are passed - to the ``re.compile()`` function as-is. See the Python - `re module `_ module for an - explanation of the acceptable patterns and flags. - """ - super().__init__() - asGroupList = asGroupList or as_group_list - asMatch = asMatch or as_match - - if isinstance(pattern, str_type): - if not pattern: - raise ValueError("null string passed to Regex; use Empty() instead") - - self._re = None - self.reString = self.pattern = pattern - self.flags = flags - - elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): - self._re = pattern - self.pattern = self.reString = pattern.pattern - self.flags = flags - - else: - raise TypeError( - "Regex may only be constructed with a string or a compiled RE object" - ) - - self.errmsg = f"Expected {self.name}" - self.mayIndexError = False - self.asGroupList = asGroupList - self.asMatch = asMatch - if self.asGroupList: - self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment] - if self.asMatch: - self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] - - @cached_property - def re(self): - if self._re: - return self._re - - try: - return re.compile(self.pattern, self.flags) - except re.error: - raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") - - @cached_property - def re_match(self): - return self.re.match - - @cached_property - def mayReturnEmpty(self): - return self.re_match("") is not None - - def _generateDefaultName(self) -> str: - return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) - - def parseImpl(self, instring, loc, doActions=True): - result = self.re_match(instring, loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - ret = ParseResults(result.group()) - d = result.groupdict() - - for k, v in d.items(): - ret[k] = v - - return loc, ret - - def parseImplAsGroupList(self, instring, loc, doActions=True): - result = self.re_match(instring, loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - ret = result.groups() - return loc, ret - - def parseImplAsMatch(self, instring, loc, doActions=True): - result = self.re_match(instring, loc) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - ret = result - return loc, ret - - def sub(self, repl: str) -> ParserElement: - r""" - Return :class:`Regex` with an attached parse action to transform the parsed - result as if called using `re.sub(expr, repl, string) `_. - - Example:: - - make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2") - print(make_html.transform_string("h1:main title:")) - # prints "

main title

" - """ - if self.asGroupList: - raise TypeError("cannot use sub() with Regex(as_group_list=True)") - - if self.asMatch and callable(repl): - raise TypeError( - "cannot use sub() with a callable with Regex(as_match=True)" - ) - - if self.asMatch: - - def pa(tokens): - return tokens[0].expand(repl) - - else: - - def pa(tokens): - return self.re.sub(repl, tokens[0]) - - return self.add_parse_action(pa) - - -class QuotedString(Token): - r""" - Token for matching strings that are delimited by quoting characters. - - Defined with the following parameters: - - - ``quote_char`` - string of one or more characters defining the - quote delimiting string - - ``esc_char`` - character to re_escape quotes, typically backslash - (default= ``None``) - - ``esc_quote`` - special quote sequence to re_escape an embedded quote - string (such as SQL's ``""`` to re_escape an embedded ``"``) - (default= ``None``) - - ``multiline`` - boolean indicating whether quotes can span - multiple lines (default= ``False``) - - ``unquote_results`` - boolean indicating whether the matched text - should be unquoted (default= ``True``) - - ``end_quote_char`` - string of one or more characters defining the - end of the quote delimited string (default= ``None`` => same as - quote_char) - - ``convert_whitespace_escapes`` - convert escaped whitespace - (``'\t'``, ``'\n'``, etc.) to actual whitespace - (default= ``True``) - - Example:: - - qs = QuotedString('"') - print(qs.search_string('lsjdf "This is the quote" sldjf')) - complex_qs = QuotedString('{{', end_quote_char='}}') - print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) - sql_qs = QuotedString('"', esc_quote='""') - print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) - - prints:: - - [['This is the quote']] - [['This is the "quote"']] - [['This is the quote with "embedded" quotes']] - """ - - ws_map = dict(((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))) - - def __init__( - self, - quote_char: str = "", - esc_char: typing.Optional[str] = None, - esc_quote: typing.Optional[str] = None, - multiline: bool = False, - unquote_results: bool = True, - end_quote_char: typing.Optional[str] = None, - convert_whitespace_escapes: bool = True, - *, - quoteChar: str = "", - escChar: typing.Optional[str] = None, - escQuote: typing.Optional[str] = None, - unquoteResults: bool = True, - endQuoteChar: typing.Optional[str] = None, - convertWhitespaceEscapes: bool = True, - ): - super().__init__() - esc_char = escChar or esc_char - esc_quote = escQuote or esc_quote - unquote_results = unquoteResults and unquote_results - end_quote_char = endQuoteChar or end_quote_char - convert_whitespace_escapes = ( - convertWhitespaceEscapes and convert_whitespace_escapes - ) - quote_char = quoteChar or quote_char - - # remove white space from quote chars - quote_char = quote_char.strip() - if not quote_char: - raise ValueError("quote_char cannot be the empty string") - - if end_quote_char is None: - end_quote_char = quote_char - else: - end_quote_char = end_quote_char.strip() - if not end_quote_char: - raise ValueError("end_quote_char cannot be the empty string") - - self.quote_char: str = quote_char - self.quote_char_len: int = len(quote_char) - self.first_quote_char: str = quote_char[0] - self.end_quote_char: str = end_quote_char - self.end_quote_char_len: int = len(end_quote_char) - self.esc_char: str = esc_char or "" - self.has_esc_char: bool = esc_char is not None - self.esc_quote: str = esc_quote or "" - self.unquote_results: bool = unquote_results - self.convert_whitespace_escapes: bool = convert_whitespace_escapes - self.multiline = multiline - self.re_flags = re.RegexFlag(0) - - # fmt: off - # build up re pattern for the content between the quote delimiters - inner_pattern = [] - - if esc_quote: - inner_pattern.append(rf"(?:{re.escape(esc_quote)})") - - if esc_char: - inner_pattern.append(rf"(?:{re.escape(esc_char)}.)") - - if len(self.end_quote_char) > 1: - inner_pattern.append( - "(?:" - + "|".join( - f"(?:{re.escape(self.end_quote_char[:i])}(?!{re.escape(self.end_quote_char[i:])}))" - for i in range(len(self.end_quote_char) - 1, 0, -1) - ) - + ")" - ) - - if self.multiline: - self.re_flags |= re.MULTILINE | re.DOTALL - inner_pattern.append( - rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}" - rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])" - ) - else: - inner_pattern.append( - rf"(?:[^{_escape_regex_range_chars(self.end_quote_char[0])}\n\r" - rf"{(_escape_regex_range_chars(esc_char) if self.has_esc_char else '')}])" - ) - - self.pattern = "".join( - [ - re.escape(self.quote_char), - "(?:", - '|'.join(inner_pattern), - ")*", - re.escape(self.end_quote_char), - ] - ) - - if self.unquote_results: - if self.convert_whitespace_escapes: - self.unquote_scan_re = re.compile( - rf"({'|'.join(re.escape(k) for k in self.ws_map)})" - rf"|({re.escape(self.esc_char)}.)" - rf"|(\n|.)", - flags=self.re_flags, - ) - else: - self.unquote_scan_re = re.compile( - rf"({re.escape(self.esc_char)}.)" - rf"|(\n|.)", - flags=self.re_flags - ) - # fmt: on - - try: - self.re = re.compile(self.pattern, self.re_flags) - self.reString = self.pattern - self.re_match = self.re.match - except re.error: - raise ValueError(f"invalid pattern {self.pattern!r} passed to Regex") - - self.errmsg = f"Expected {self.name}" - self.mayIndexError = False - self.mayReturnEmpty = True - - def _generateDefaultName(self) -> str: - if self.quote_char == self.end_quote_char and isinstance( - self.quote_char, str_type - ): - return f"string enclosed in {self.quote_char!r}" - - return f"quoted string, starting with {self.quote_char} ending with {self.end_quote_char}" - - def parseImpl(self, instring, loc, doActions=True): - # check first character of opening quote to see if that is a match - # before doing the more complicated regex match - result = ( - instring[loc] == self.first_quote_char - and self.re_match(instring, loc) - or None - ) - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - # get ending loc and matched string from regex matching result - loc = result.end() - ret = result.group() - - if self.unquote_results: - # strip off quotes - ret = ret[self.quote_char_len : -self.end_quote_char_len] - - if isinstance(ret, str_type): - # fmt: off - if self.convert_whitespace_escapes: - # as we iterate over matches in the input string, - # collect from whichever match group of the unquote_scan_re - # regex matches (only 1 group will match at any given time) - ret = "".join( - # match group 1 matches \t, \n, etc. - self.ws_map[match.group(1)] if match.group(1) - # match group 2 matches escaped characters - else match.group(2)[-1] if match.group(2) - # match group 3 matches any character - else match.group(3) - for match in self.unquote_scan_re.finditer(ret) - ) - else: - ret = "".join( - # match group 1 matches escaped characters - match.group(1)[-1] if match.group(1) - # match group 2 matches any character - else match.group(2) - for match in self.unquote_scan_re.finditer(ret) - ) - # fmt: on - - # replace escaped quotes - if self.esc_quote: - ret = ret.replace(self.esc_quote, self.end_quote_char) - - return loc, ret - - -class CharsNotIn(Token): - """Token for matching words composed of characters *not* in a given - set (will include whitespace in matched characters if not listed in - the provided exclusion set - see example). Defined with string - containing all disallowed characters, and an optional minimum, - maximum, and/or exact length. The default value for ``min`` is - 1 (a minimum value < 1 is not valid); the default values for - ``max`` and ``exact`` are 0, meaning no maximum or exact - length restriction. - - Example:: - - # define a comma-separated-value as anything that is not a ',' - csv_value = CharsNotIn(',') - print(DelimitedList(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) - - prints:: - - ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] - """ - - def __init__( - self, - not_chars: str = "", - min: int = 1, - max: int = 0, - exact: int = 0, - *, - notChars: str = "", - ): - super().__init__() - self.skipWhitespace = False - self.notChars = not_chars or notChars - self.notCharsSet = set(self.notChars) - - if min < 1: - raise ValueError( - "cannot specify a minimum length < 1; use" - " Opt(CharsNotIn()) if zero-length char group is permitted" - ) - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.errmsg = f"Expected {self.name}" - self.mayReturnEmpty = self.minLen == 0 - self.mayIndexError = False - - def _generateDefaultName(self) -> str: - not_chars_str = _collapse_string_to_ranges(self.notChars) - if len(not_chars_str) > 16: - return f"!W:({self.notChars[: 16 - 3]}...)" - else: - return f"!W:({self.notChars})" - - def parseImpl(self, instring, loc, doActions=True): - notchars = self.notCharsSet - if instring[loc] in notchars: - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - maxlen = min(start + self.maxLen, len(instring)) - while loc < maxlen and instring[loc] not in notchars: - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - -class White(Token): - """Special matching class for matching whitespace. Normally, - whitespace is ignored by pyparsing grammars. This class is included - when some whitespace structures are significant. Define with - a string containing the whitespace characters to be matched; default - is ``" \\t\\r\\n"``. Also takes optional ``min``, - ``max``, and ``exact`` arguments, as defined for the - :class:`Word` class. - """ - - whiteStrs = { - " ": "", - "\t": "", - "\n": "", - "\r": "", - "\f": "", - "\u00A0": "", - "\u1680": "", - "\u180E": "", - "\u2000": "", - "\u2001": "", - "\u2002": "", - "\u2003": "", - "\u2004": "", - "\u2005": "", - "\u2006": "", - "\u2007": "", - "\u2008": "", - "\u2009": "", - "\u200A": "", - "\u200B": "", - "\u202F": "", - "\u205F": "", - "\u3000": "", - } - - def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): - super().__init__() - self.matchWhite = ws - self.set_whitespace_chars( - "".join(c for c in self.whiteStrs if c not in self.matchWhite), - copy_defaults=True, - ) - # self.leave_whitespace() - self.mayReturnEmpty = True - self.errmsg = f"Expected {self.name}" - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - def _generateDefaultName(self) -> str: - return "".join(White.whiteStrs[c] for c in self.matchWhite) - - def parseImpl(self, instring, loc, doActions=True): - if instring[loc] not in self.matchWhite: - raise ParseException(instring, loc, self.errmsg, self) - start = loc - loc += 1 - maxloc = start + self.maxLen - maxloc = min(maxloc, len(instring)) - while loc < maxloc and instring[loc] in self.matchWhite: - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - -class PositionToken(Token): - def __init__(self): - super().__init__() - self.mayReturnEmpty = True - self.mayIndexError = False - - -class GoToColumn(PositionToken): - """Token to advance to a specific column of input text; useful for - tabular report scraping. - """ - - def __init__(self, colno: int): - super().__init__() - self.col = colno - - def preParse(self, instring: str, loc: int) -> int: - if col(loc, instring) == self.col: - return loc - - instrlen = len(instring) - if self.ignoreExprs: - loc = self._skipIgnorables(instring, loc) - while ( - loc < instrlen - and instring[loc].isspace() - and col(loc, instring) != self.col - ): - loc += 1 - - return loc - - def parseImpl(self, instring, loc, doActions=True): - thiscol = col(loc, instring) - if thiscol > self.col: - raise ParseException(instring, loc, "Text not in expected column", self) - newloc = loc + self.col - thiscol - ret = instring[loc:newloc] - return newloc, ret - - -class LineStart(PositionToken): - r"""Matches if current position is at the beginning of a line within - the parse string - - Example:: - - test = '''\ - AAA this line - AAA and this line - AAA but not this one - B AAA and definitely not this one - ''' - - for t in (LineStart() + 'AAA' + rest_of_line).search_string(test): - print(t) - - prints:: - - ['AAA', ' this line'] - ['AAA', ' and this line'] - - """ - - def __init__(self): - super().__init__() - self.leave_whitespace() - self.orig_whiteChars = set() | self.whiteChars - self.whiteChars.discard("\n") - self.skipper = Empty().set_whitespace_chars(self.whiteChars) - self.errmsg = "Expected start of line" - - def preParse(self, instring: str, loc: int) -> int: - if loc == 0: - return loc - - ret = self.skipper.preParse(instring, loc) - - if "\n" in self.orig_whiteChars: - while instring[ret : ret + 1] == "\n": - ret = self.skipper.preParse(instring, ret + 1) - - return ret - - def parseImpl(self, instring, loc, doActions=True): - if col(loc, instring) == 1: - return loc, [] - raise ParseException(instring, loc, self.errmsg, self) - - -class LineEnd(PositionToken): - """Matches if current position is at the end of a line within the - parse string - """ - - def __init__(self): - super().__init__() - self.whiteChars.discard("\n") - self.set_whitespace_chars(self.whiteChars, copy_defaults=False) - self.errmsg = "Expected end of line" - - def parseImpl(self, instring, loc, doActions=True): - if loc < len(instring): - if instring[loc] == "\n": - return loc + 1, "\n" - else: - raise ParseException(instring, loc, self.errmsg, self) - elif loc == len(instring): - return loc + 1, [] - else: - raise ParseException(instring, loc, self.errmsg, self) - - -class StringStart(PositionToken): - """Matches if current position is at the beginning of the parse - string - """ - - def __init__(self): - super().__init__() - self.errmsg = "Expected start of text" - - def parseImpl(self, instring, loc, doActions=True): - # see if entire string up to here is just whitespace and ignoreables - if loc != 0 and loc != self.preParse(instring, 0): - raise ParseException(instring, loc, self.errmsg, self) - - return loc, [] - - -class StringEnd(PositionToken): - """ - Matches if current position is at the end of the parse string - """ - - def __init__(self): - super().__init__() - self.errmsg = "Expected end of text" - - def parseImpl(self, instring, loc, doActions=True): - if loc < len(instring): - raise ParseException(instring, loc, self.errmsg, self) - if loc == len(instring): - return loc + 1, [] - if loc > len(instring): - return loc, [] - - raise ParseException(instring, loc, self.errmsg, self) - - -class WordStart(PositionToken): - """Matches if the current position is at the beginning of a - :class:`Word`, and is not preceded by any character in a given - set of ``word_chars`` (default= ``printables``). To emulate the - ``\b`` behavior of regular expressions, use - ``WordStart(alphanums)``. ``WordStart`` will also match at - the beginning of the string being parsed, or at the beginning of - a line. - """ - - def __init__(self, word_chars: str = printables, *, wordChars: str = printables): - wordChars = word_chars if wordChars == printables else wordChars - super().__init__() - self.wordChars = set(wordChars) - self.errmsg = "Not at the start of a word" - - def parseImpl(self, instring, loc, doActions=True): - if loc != 0: - if ( - instring[loc - 1] in self.wordChars - or instring[loc] not in self.wordChars - ): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - - -class WordEnd(PositionToken): - """Matches if the current position is at the end of a :class:`Word`, - and is not followed by any character in a given set of ``word_chars`` - (default= ``printables``). To emulate the ``\b`` behavior of - regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` - will also match at the end of the string being parsed, or at the end - of a line. - """ - - def __init__(self, word_chars: str = printables, *, wordChars: str = printables): - wordChars = word_chars if wordChars == printables else wordChars - super().__init__() - self.wordChars = set(wordChars) - self.skipWhitespace = False - self.errmsg = "Not at the end of a word" - - def parseImpl(self, instring, loc, doActions=True): - instrlen = len(instring) - if instrlen > 0 and loc < instrlen: - if ( - instring[loc] in self.wordChars - or instring[loc - 1] not in self.wordChars - ): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - - -class ParseExpression(ParserElement): - """Abstract subclass of ParserElement, for combining and - post-processing parsed tokens. - """ - - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): - super().__init__(savelist) - self.exprs: List[ParserElement] - if isinstance(exprs, _generatorType): - exprs = list(exprs) - - if isinstance(exprs, str_type): - self.exprs = [self._literalStringClass(exprs)] - elif isinstance(exprs, ParserElement): - self.exprs = [exprs] - elif isinstance(exprs, Iterable): - exprs = list(exprs) - # if sequence of strings provided, wrap with Literal - if any(isinstance(expr, str_type) for expr in exprs): - exprs = ( - self._literalStringClass(e) if isinstance(e, str_type) else e - for e in exprs - ) - self.exprs = list(exprs) - else: - try: - self.exprs = list(exprs) - except TypeError: - self.exprs = [exprs] - self.callPreparse = False - - def recurse(self) -> List[ParserElement]: - return self.exprs[:] - - def append(self, other) -> ParserElement: - self.exprs.append(other) - self._defaultName = None - return self - - def leave_whitespace(self, recursive: bool = True) -> ParserElement: - """ - Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on - all contained expressions. - """ - super().leave_whitespace(recursive) - - if recursive: - self.exprs = [e.copy() for e in self.exprs] - for e in self.exprs: - e.leave_whitespace(recursive) - return self - - def ignore_whitespace(self, recursive: bool = True) -> ParserElement: - """ - Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on - all contained expressions. - """ - super().ignore_whitespace(recursive) - if recursive: - self.exprs = [e.copy() for e in self.exprs] - for e in self.exprs: - e.ignore_whitespace(recursive) - return self - - def ignore(self, other) -> ParserElement: - if isinstance(other, Suppress): - if other not in self.ignoreExprs: - super().ignore(other) - for e in self.exprs: - e.ignore(self.ignoreExprs[-1]) - else: - super().ignore(other) - for e in self.exprs: - e.ignore(self.ignoreExprs[-1]) - return self - - def _generateDefaultName(self) -> str: - return f"{type(self).__name__}:({self.exprs})" - - def streamline(self) -> ParserElement: - if self.streamlined: - return self - - super().streamline() - - for e in self.exprs: - e.streamline() - - # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` - # but only if there are no parse actions or resultsNames on the nested And's - # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) - if len(self.exprs) == 2: - other = self.exprs[0] - if ( - isinstance(other, self.__class__) - and not other.parseAction - and other.resultsName is None - and not other.debug - ): - self.exprs = other.exprs[:] + [self.exprs[1]] - self._defaultName = None - self.mayReturnEmpty |= other.mayReturnEmpty - self.mayIndexError |= other.mayIndexError - - other = self.exprs[-1] - if ( - isinstance(other, self.__class__) - and not other.parseAction - and other.resultsName is None - and not other.debug - ): - self.exprs = self.exprs[:-1] + other.exprs[:] - self._defaultName = None - self.mayReturnEmpty |= other.mayReturnEmpty - self.mayIndexError |= other.mayIndexError - - self.errmsg = f"Expected {self}" - - return self - - def validate(self, validateTrace=None) -> None: - warnings.warn( - "ParserElement.validate() is deprecated, and should not be used to check for left recursion", - DeprecationWarning, - stacklevel=2, - ) - tmp = (validateTrace if validateTrace is not None else [])[:] + [self] - for e in self.exprs: - e.validate(tmp) - self._checkRecursion([]) - - def copy(self) -> ParserElement: - ret = super().copy() - ret = typing.cast(ParseExpression, ret) - ret.exprs = [e.copy() for e in self.exprs] - return ret - - def _setResultsName(self, name, listAllMatches=False): - if not ( - __diag__.warn_ungrouped_named_tokens_in_collection - and Diagnostics.warn_ungrouped_named_tokens_in_collection - not in self.suppress_warnings_ - ): - return super()._setResultsName(name, listAllMatches) - - for e in self.exprs: - if ( - isinstance(e, ParserElement) - and e.resultsName - and ( - Diagnostics.warn_ungrouped_named_tokens_in_collection - not in e.suppress_warnings_ - ) - ): - warning = ( - "warn_ungrouped_named_tokens_in_collection:" - f" setting results name {name!r} on {type(self).__name__} expression" - f" collides with {e.resultsName!r} on contained expression" - ) - warnings.warn(warning, stacklevel=3) - break - - return super()._setResultsName(name, listAllMatches) - - # Compatibility synonyms - # fmt: off - leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) - ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) - # fmt: on - - -class And(ParseExpression): - """ - Requires all given :class:`ParseExpression` s to be found in the given order. - Expressions may be separated by whitespace. - May be constructed using the ``'+'`` operator. - May also be constructed using the ``'-'`` operator, which will - suppress backtracking. - - Example:: - - integer = Word(nums) - name_expr = Word(alphas)[1, ...] - - expr = And([integer("id"), name_expr("name"), integer("age")]) - # more easily written as: - expr = integer("id") + name_expr("name") + integer("age") - """ - - class _ErrorStop(Empty): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.leave_whitespace() - - def _generateDefaultName(self) -> str: - return "-" - - def __init__( - self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True - ): - exprs: List[ParserElement] = list(exprs_arg) - if exprs and Ellipsis in exprs: - tmp = [] - for i, expr in enumerate(exprs): - if expr is not Ellipsis: - tmp.append(expr) - continue - - if i < len(exprs) - 1: - skipto_arg: ParserElement = typing.cast( - ParseExpression, (Empty() + exprs[i + 1]) - ).exprs[-1] - tmp.append(SkipTo(skipto_arg)("_skipped*")) - continue - - raise Exception("cannot construct And with sequence ending in ...") - exprs[:] = tmp - super().__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - if not isinstance(self.exprs[0], White): - self.set_whitespace_chars( - self.exprs[0].whiteChars, - copy_defaults=self.exprs[0].copyDefaultWhiteChars, - ) - self.skipWhitespace = self.exprs[0].skipWhitespace - else: - self.skipWhitespace = False - else: - self.mayReturnEmpty = True - self.callPreparse = True - - def streamline(self) -> ParserElement: - # collapse any _PendingSkip's - if self.exprs and any( - isinstance(e, ParseExpression) - and e.exprs - and isinstance(e.exprs[-1], _PendingSkip) - for e in self.exprs[:-1] - ): - deleted_expr_marker = NoMatch() - for i, e in enumerate(self.exprs[:-1]): - if e is deleted_expr_marker: - continue - if ( - isinstance(e, ParseExpression) - and e.exprs - and isinstance(e.exprs[-1], _PendingSkip) - ): - e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] - self.exprs[i + 1] = deleted_expr_marker - self.exprs = [e for e in self.exprs if e is not deleted_expr_marker] - - super().streamline() - - # link any IndentedBlocks to the prior expression - prev: ParserElement - cur: ParserElement - for prev, cur in zip(self.exprs, self.exprs[1:]): - # traverse cur or any first embedded expr of cur looking for an IndentedBlock - # (but watch out for recursive grammar) - seen = set() - while True: - if id(cur) in seen: - break - seen.add(id(cur)) - if isinstance(cur, IndentedBlock): - prev.add_parse_action( - lambda s, l, t, cur_=cur: setattr( - cur_, "parent_anchor", col(l, s) - ) - ) - break - subs = cur.recurse() - next_first = next(iter(subs), None) - if next_first is None: - break - cur = typing.cast(ParserElement, next_first) - - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - return self - - def parseImpl(self, instring, loc, doActions=True): - # pass False as callPreParse arg to _parse for first element, since we already - # pre-parsed the string as part of our And pre-parsing - loc, resultlist = self.exprs[0]._parse( - instring, loc, doActions, callPreParse=False - ) - errorStop = False - for e in self.exprs[1:]: - # if isinstance(e, And._ErrorStop): - if type(e) is And._ErrorStop: - errorStop = True - continue - if errorStop: - try: - loc, exprtokens = e._parse(instring, loc, doActions) - except ParseSyntaxException: - raise - except ParseBaseException as pe: - pe.__traceback__ = None - raise ParseSyntaxException._from_exception(pe) - except IndexError: - raise ParseSyntaxException( - instring, len(instring), self.errmsg, self - ) - else: - loc, exprtokens = e._parse(instring, loc, doActions) - resultlist += exprtokens - return loc, resultlist - - def __iadd__(self, other): - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return self.append(other) # And([self, other]) - - def _checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] - for e in self.exprs: - e._checkRecursion(subRecCheckList) - if not e.mayReturnEmpty: - break - - def _generateDefaultName(self) -> str: - inner = " ".join(str(e) for e in self.exprs) - # strip off redundant inner {}'s - while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": - inner = inner[1:-1] - return f"{{{inner}}}" - - -class Or(ParseExpression): - """Requires that at least one :class:`ParseExpression` is found. If - two expressions match, the expression that matches the longest - string will be used. May be constructed using the ``'^'`` - operator. - - Example:: - - # construct Or using '^' operator - - number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) - print(number.search_string("123 3.1416 789")) - - prints:: - - [['123'], ['3.1416'], ['789']] - """ - - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): - super().__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) - else: - self.mayReturnEmpty = True - - def streamline(self) -> ParserElement: - super().streamline() - if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - self.saveAsList = any(e.saveAsList for e in self.exprs) - self.skipWhitespace = all( - e.skipWhitespace and not isinstance(e, White) for e in self.exprs - ) - else: - self.saveAsList = False - return self - - def parseImpl(self, instring, loc, doActions=True): - maxExcLoc = -1 - maxException = None - matches = [] - fatals = [] - if all(e.callPreparse for e in self.exprs): - loc = self.preParse(instring, loc) - for e in self.exprs: - try: - loc2 = e.try_parse(instring, loc, raise_fatal=True) - except ParseFatalException as pfe: - pfe.__traceback__ = None - pfe.parser_element = e - fatals.append(pfe) - maxException = None - maxExcLoc = -1 - except ParseException as err: - if not fatals: - err.__traceback__ = None - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException( - instring, len(instring), e.errmsg, self - ) - maxExcLoc = len(instring) - else: - # save match among all matches, to retry longest to shortest - matches.append((loc2, e)) - - if matches: - # re-evaluate all matches in descending order of length of match, in case attached actions - # might change whether or how much they match of the input. - matches.sort(key=itemgetter(0), reverse=True) - - if not doActions: - # no further conditions or parse actions to change the selection of - # alternative, so the first match will be the best match - best_expr = matches[0][1] - return best_expr._parse(instring, loc, doActions) - - longest = -1, None - for loc1, expr1 in matches: - if loc1 <= longest[0]: - # already have a longer match than this one will deliver, we are done - return longest - - try: - loc2, toks = expr1._parse(instring, loc, doActions) - except ParseException as err: - err.__traceback__ = None - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - else: - if loc2 >= loc1: - return loc2, toks - # didn't match as much as before - elif loc2 > longest[0]: - longest = loc2, toks - - if longest != (-1, None): - return longest - - if fatals: - if len(fatals) > 1: - fatals.sort(key=lambda e: -e.loc) - if fatals[0].loc == fatals[1].loc: - fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) - max_fatal = fatals[0] - raise max_fatal - - if maxException is not None: - # infer from this check that all alternatives failed at the current position - # so emit this collective error message instead of any single error message - if maxExcLoc == loc: - maxException.msg = self.errmsg - raise maxException - - raise ParseException(instring, loc, "no defined alternatives to match", self) - - def __ixor__(self, other): - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return self.append(other) # Or([self, other]) - - def _generateDefaultName(self) -> str: - return f"{{{' ^ '.join(str(e) for e in self.exprs)}}}" - - def _setResultsName(self, name, listAllMatches=False): - if ( - __diag__.warn_multiple_tokens_in_named_alternation - and Diagnostics.warn_multiple_tokens_in_named_alternation - not in self.suppress_warnings_ - ): - if any( - isinstance(e, And) - and Diagnostics.warn_multiple_tokens_in_named_alternation - not in e.suppress_warnings_ - for e in self.exprs - ): - warning = ( - "warn_multiple_tokens_in_named_alternation:" - f" setting results name {name!r} on {type(self).__name__} expression" - " will return a list of all parsed tokens in an And alternative," - " in prior versions only the first token was returned; enclose" - " contained argument in Group" - ) - warnings.warn(warning, stacklevel=3) - - return super()._setResultsName(name, listAllMatches) - - -class MatchFirst(ParseExpression): - """Requires that at least one :class:`ParseExpression` is found. If - more than one expression matches, the first one listed is the one that will - match. May be constructed using the ``'|'`` operator. - - Example:: - - # construct MatchFirst using '|' operator - - # watch the order of expressions to match - number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) - print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] - - # put more selective expression first - number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) - print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] - """ - - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): - super().__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) - else: - self.mayReturnEmpty = True - - def streamline(self) -> ParserElement: - if self.streamlined: - return self - - super().streamline() - if self.exprs: - self.saveAsList = any(e.saveAsList for e in self.exprs) - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - self.skipWhitespace = all( - e.skipWhitespace and not isinstance(e, White) for e in self.exprs - ) - else: - self.saveAsList = False - self.mayReturnEmpty = True - return self - - def parseImpl(self, instring, loc, doActions=True): - maxExcLoc = -1 - maxException = None - - for e in self.exprs: - try: - return e._parse(instring, loc, doActions) - except ParseFatalException as pfe: - pfe.__traceback__ = None - pfe.parser_element = e - raise - except ParseException as err: - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException( - instring, len(instring), e.errmsg, self - ) - maxExcLoc = len(instring) - - if maxException is not None: - # infer from this check that all alternatives failed at the current position - # so emit this collective error message instead of any individual error message - if maxExcLoc == loc: - maxException.msg = self.errmsg - raise maxException - - raise ParseException(instring, loc, "no defined alternatives to match", self) - - def __ior__(self, other): - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return self.append(other) # MatchFirst([self, other]) - - def _generateDefaultName(self) -> str: - return f"{{{' | '.join(str(e) for e in self.exprs)}}}" - - def _setResultsName(self, name, listAllMatches=False): - if ( - __diag__.warn_multiple_tokens_in_named_alternation - and Diagnostics.warn_multiple_tokens_in_named_alternation - not in self.suppress_warnings_ - ): - if any( - isinstance(e, And) - and Diagnostics.warn_multiple_tokens_in_named_alternation - not in e.suppress_warnings_ - for e in self.exprs - ): - warning = ( - "warn_multiple_tokens_in_named_alternation:" - f" setting results name {name!r} on {type(self).__name__} expression" - " will return a list of all parsed tokens in an And alternative," - " in prior versions only the first token was returned; enclose" - " contained argument in Group" - ) - warnings.warn(warning, stacklevel=3) - - return super()._setResultsName(name, listAllMatches) - - -class Each(ParseExpression): - """Requires all given :class:`ParseExpression` s to be found, but in - any order. Expressions may be separated by whitespace. - - May be constructed using the ``'&'`` operator. - - Example:: - - color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") - shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") - integer = Word(nums) - shape_attr = "shape:" + shape_type("shape") - posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") - color_attr = "color:" + color("color") - size_attr = "size:" + integer("size") - - # use Each (using operator '&') to accept attributes in any order - # (shape and posn are required, color and size are optional) - shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) - - shape_spec.run_tests(''' - shape: SQUARE color: BLACK posn: 100, 120 - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - color:GREEN size:20 shape:TRIANGLE posn:20,40 - ''' - ) - - prints:: - - shape: SQUARE color: BLACK posn: 100, 120 - ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] - - color: BLACK - - posn: ['100', ',', '120'] - - x: 100 - - y: 120 - - shape: SQUARE - - - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] - - color: BLUE - - posn: ['50', ',', '80'] - - x: 50 - - y: 80 - - shape: CIRCLE - - size: 50 - - - color: GREEN size: 20 shape: TRIANGLE posn: 20,40 - ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] - - color: GREEN - - posn: ['20', ',', '40'] - - x: 20 - - y: 40 - - shape: TRIANGLE - - size: 20 - """ - - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): - super().__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - else: - self.mayReturnEmpty = True - self.skipWhitespace = True - self.initExprGroups = True - self.saveAsList = True - - def __iand__(self, other): - if isinstance(other, str_type): - other = self._literalStringClass(other) - if not isinstance(other, ParserElement): - return NotImplemented - return self.append(other) # Each([self, other]) - - def streamline(self) -> ParserElement: - super().streamline() - if self.exprs: - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - else: - self.mayReturnEmpty = True - return self - - def parseImpl(self, instring, loc, doActions=True): - if self.initExprGroups: - self.opt1map = dict( - (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) - ) - opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] - opt2 = [ - e - for e in self.exprs - if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) - ] - self.optionals = opt1 + opt2 - self.multioptionals = [ - e.expr.set_results_name(e.resultsName, list_all_matches=True) - for e in self.exprs - if isinstance(e, _MultipleMatch) - ] - self.multirequired = [ - e.expr.set_results_name(e.resultsName, list_all_matches=True) - for e in self.exprs - if isinstance(e, OneOrMore) - ] - self.required = [ - e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) - ] - self.required += self.multirequired - self.initExprGroups = False - - tmpLoc = loc - tmpReqd = self.required[:] - tmpOpt = self.optionals[:] - multis = self.multioptionals[:] - matchOrder = [] - - keepMatching = True - failed = [] - fatals = [] - while keepMatching: - tmpExprs = tmpReqd + tmpOpt + multis - failed.clear() - fatals.clear() - for e in tmpExprs: - try: - tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) - except ParseFatalException as pfe: - pfe.__traceback__ = None - pfe.parser_element = e - fatals.append(pfe) - failed.append(e) - except ParseException: - failed.append(e) - else: - matchOrder.append(self.opt1map.get(id(e), e)) - if e in tmpReqd: - tmpReqd.remove(e) - elif e in tmpOpt: - tmpOpt.remove(e) - if len(failed) == len(tmpExprs): - keepMatching = False - - # look for any ParseFatalExceptions - if fatals: - if len(fatals) > 1: - fatals.sort(key=lambda e: -e.loc) - if fatals[0].loc == fatals[1].loc: - fatals.sort(key=lambda e: (-e.loc, -len(str(e.parser_element)))) - max_fatal = fatals[0] - raise max_fatal - - if tmpReqd: - missing = ", ".join([str(e) for e in tmpReqd]) - raise ParseException( - instring, - loc, - f"Missing one or more required elements ({missing})", - ) - - # add any unmatched Opts, in case they have default values defined - matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] - - total_results = ParseResults([]) - for e in matchOrder: - loc, results = e._parse(instring, loc, doActions) - total_results += results - - return loc, total_results - - def _generateDefaultName(self) -> str: - return f"{{{' & '.join(str(e) for e in self.exprs)}}}" - - -class ParseElementEnhance(ParserElement): - """Abstract subclass of :class:`ParserElement`, for combining and - post-processing parsed tokens. - """ - - def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): - super().__init__(savelist) - if isinstance(expr, str_type): - expr_str = typing.cast(str, expr) - if issubclass(self._literalStringClass, Token): - expr = self._literalStringClass(expr_str) # type: ignore[call-arg] - elif issubclass(type(self), self._literalStringClass): - expr = Literal(expr_str) - else: - expr = self._literalStringClass(Literal(expr_str)) # type: ignore[assignment, call-arg] - expr = typing.cast(ParserElement, expr) - self.expr = expr - if expr is not None: - self.mayIndexError = expr.mayIndexError - self.mayReturnEmpty = expr.mayReturnEmpty - self.set_whitespace_chars( - expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars - ) - self.skipWhitespace = expr.skipWhitespace - self.saveAsList = expr.saveAsList - self.callPreparse = expr.callPreparse - self.ignoreExprs.extend(expr.ignoreExprs) - - def recurse(self) -> List[ParserElement]: - return [self.expr] if self.expr is not None else [] - - def parseImpl(self, instring, loc, doActions=True): - if self.expr is None: - raise ParseException(instring, loc, "No expression defined", self) - - try: - return self.expr._parse(instring, loc, doActions, callPreParse=False) - except ParseBaseException as pbe: - if not isinstance(self, Forward) or self.customName is not None: - if self.errmsg: - pbe.msg = self.errmsg - raise - - def leave_whitespace(self, recursive: bool = True) -> ParserElement: - super().leave_whitespace(recursive) - - if recursive: - if self.expr is not None: - self.expr = self.expr.copy() - self.expr.leave_whitespace(recursive) - return self - - def ignore_whitespace(self, recursive: bool = True) -> ParserElement: - super().ignore_whitespace(recursive) - - if recursive: - if self.expr is not None: - self.expr = self.expr.copy() - self.expr.ignore_whitespace(recursive) - return self - - def ignore(self, other) -> ParserElement: - if not isinstance(other, Suppress) or other not in self.ignoreExprs: - super().ignore(other) - if self.expr is not None: - self.expr.ignore(self.ignoreExprs[-1]) - - return self - - def streamline(self) -> ParserElement: - super().streamline() - if self.expr is not None: - self.expr.streamline() - return self - - def _checkRecursion(self, parseElementList): - if self in parseElementList: - raise RecursiveGrammarException(parseElementList + [self]) - subRecCheckList = parseElementList[:] + [self] - if self.expr is not None: - self.expr._checkRecursion(subRecCheckList) - - def validate(self, validateTrace=None) -> None: - warnings.warn( - "ParserElement.validate() is deprecated, and should not be used to check for left recursion", - DeprecationWarning, - stacklevel=2, - ) - if validateTrace is None: - validateTrace = [] - tmp = validateTrace[:] + [self] - if self.expr is not None: - self.expr.validate(tmp) - self._checkRecursion([]) - - def _generateDefaultName(self) -> str: - return f"{type(self).__name__}:({self.expr})" - - # Compatibility synonyms - # fmt: off - leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) - ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) - # fmt: on - - -class IndentedBlock(ParseElementEnhance): - """ - Expression to match one or more expressions at a given indentation level. - Useful for parsing text where structure is implied by indentation (like Python source code). - """ - - class _Indent(Empty): - def __init__(self, ref_col: int): - super().__init__() - self.errmsg = f"expected indent at column {ref_col}" - self.add_condition(lambda s, l, t: col(l, s) == ref_col) - - class _IndentGreater(Empty): - def __init__(self, ref_col: int): - super().__init__() - self.errmsg = f"expected indent at column greater than {ref_col}" - self.add_condition(lambda s, l, t: col(l, s) > ref_col) - - def __init__( - self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True - ): - super().__init__(expr, savelist=True) - # if recursive: - # raise NotImplementedError("IndentedBlock with recursive is not implemented") - self._recursive = recursive - self._grouped = grouped - self.parent_anchor = 1 - - def parseImpl(self, instring, loc, doActions=True): - # advance parse position to non-whitespace by using an Empty() - # this should be the column to be used for all subsequent indented lines - anchor_loc = Empty().preParse(instring, loc) - - # see if self.expr matches at the current location - if not it will raise an exception - # and no further work is necessary - self.expr.try_parse(instring, anchor_loc, do_actions=doActions) - - indent_col = col(anchor_loc, instring) - peer_detect_expr = self._Indent(indent_col) - - inner_expr = Empty() + peer_detect_expr + self.expr - if self._recursive: - sub_indent = self._IndentGreater(indent_col) - nested_block = IndentedBlock( - self.expr, recursive=self._recursive, grouped=self._grouped - ) - nested_block.set_debug(self.debug) - nested_block.parent_anchor = indent_col - inner_expr += Opt(sub_indent + nested_block) - - inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") - block = OneOrMore(inner_expr) - - trailing_undent = self._Indent(self.parent_anchor) | StringEnd() - - if self._grouped: - wrapper = Group - else: - wrapper = lambda expr: expr - return (wrapper(block) + Optional(trailing_undent)).parseImpl( - instring, anchor_loc, doActions - ) - - -class AtStringStart(ParseElementEnhance): - """Matches if expression matches at the beginning of the parse - string:: - - AtStringStart(Word(nums)).parse_string("123") - # prints ["123"] - - AtStringStart(Word(nums)).parse_string(" 123") - # raises ParseException - """ - - def __init__(self, expr: Union[ParserElement, str]): - super().__init__(expr) - self.callPreparse = False - - def parseImpl(self, instring, loc, doActions=True): - if loc != 0: - raise ParseException(instring, loc, "not found at string start") - return super().parseImpl(instring, loc, doActions) - - -class AtLineStart(ParseElementEnhance): - r"""Matches if an expression matches at the beginning of a line within - the parse string - - Example:: - - test = '''\ - AAA this line - AAA and this line - AAA but not this one - B AAA and definitely not this one - ''' - - for t in (AtLineStart('AAA') + rest_of_line).search_string(test): - print(t) - - prints:: - - ['AAA', ' this line'] - ['AAA', ' and this line'] - - """ - - def __init__(self, expr: Union[ParserElement, str]): - super().__init__(expr) - self.callPreparse = False - - def parseImpl(self, instring, loc, doActions=True): - if col(loc, instring) != 1: - raise ParseException(instring, loc, "not found at line start") - return super().parseImpl(instring, loc, doActions) - - -class FollowedBy(ParseElementEnhance): - """Lookahead matching of the given parse expression. - ``FollowedBy`` does *not* advance the parsing position within - the input string, it only verifies that the specified parse - expression matches at the current position. ``FollowedBy`` - always returns a null token list. If any results names are defined - in the lookahead expression, those *will* be returned for access by - name. - - Example:: - - # use FollowedBy to match a label only if it is followed by a ':' - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) - - attr_expr[1, ...].parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() - - prints:: - - [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] - """ - - def __init__(self, expr: Union[ParserElement, str]): - super().__init__(expr) - self.mayReturnEmpty = True - - def parseImpl(self, instring, loc, doActions=True): - # by using self._expr.parse and deleting the contents of the returned ParseResults list - # we keep any named results that were defined in the FollowedBy expression - _, ret = self.expr._parse(instring, loc, doActions=doActions) - del ret[:] - - return loc, ret - - -class PrecededBy(ParseElementEnhance): - """Lookbehind matching of the given parse expression. - ``PrecededBy`` does not advance the parsing position within the - input string, it only verifies that the specified parse expression - matches prior to the current position. ``PrecededBy`` always - returns a null token list, but if a results name is defined on the - given expression, it is returned. - - Parameters: - - - ``expr`` - expression that must match prior to the current parse - location - - ``retreat`` - (default= ``None``) - (int) maximum number of characters - to lookbehind prior to the current parse location - - If the lookbehind expression is a string, :class:`Literal`, - :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` - with a specified exact or maximum length, then the retreat - parameter is not required. Otherwise, retreat must be specified to - give a maximum number of characters to look back from - the current parse position for a lookbehind match. - - Example:: - - # VB-style variable names with type prefixes - int_var = PrecededBy("#") + pyparsing_common.identifier - str_var = PrecededBy("$") + pyparsing_common.identifier - - """ - - def __init__( - self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None - ): - super().__init__(expr) - self.expr = self.expr().leave_whitespace() - self.mayReturnEmpty = True - self.mayIndexError = False - self.exact = False - if isinstance(expr, str_type): - expr = typing.cast(str, expr) - retreat = len(expr) - self.exact = True - elif isinstance(expr, (Literal, Keyword)): - retreat = expr.matchLen - self.exact = True - elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: - retreat = expr.maxLen - self.exact = True - elif isinstance(expr, PositionToken): - retreat = 0 - self.exact = True - self.retreat = retreat - self.errmsg = f"not preceded by {expr}" - self.skipWhitespace = False - self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) - - def parseImpl(self, instring, loc=0, doActions=True): - if self.exact: - if loc < self.retreat: - raise ParseException(instring, loc, self.errmsg) - start = loc - self.retreat - _, ret = self.expr._parse(instring, start) - return loc, ret - - # retreat specified a maximum lookbehind window, iterate - test_expr = self.expr + StringEnd() - instring_slice = instring[max(0, loc - self.retreat) : loc] - last_expr = ParseException(instring, loc, self.errmsg) - - for offset in range(1, min(loc, self.retreat + 1) + 1): - try: - # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) - _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) - except ParseBaseException as pbe: - last_expr = pbe - else: - break - else: - raise last_expr - - return loc, ret - - -class Located(ParseElementEnhance): - """ - Decorates a returned token with its starting and ending - locations in the input string. - - This helper adds the following results names: - - - ``locn_start`` - location where matched expression begins - - ``locn_end`` - location where matched expression ends - - ``value`` - the actual parsed results - - Be careful if the input text contains ```` characters, you - may want to call :class:`ParserElement.parse_with_tabs` - - Example:: - - wd = Word(alphas) - for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): - print(match) - - prints:: - - [0, ['ljsdf'], 5] - [8, ['lksdjjf'], 15] - [18, ['lkkjj'], 23] - - """ - - def parseImpl(self, instring, loc, doActions=True): - start = loc - loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) - ret_tokens = ParseResults([start, tokens, loc]) - ret_tokens["locn_start"] = start - ret_tokens["value"] = tokens - ret_tokens["locn_end"] = loc - if self.resultsName: - # must return as a list, so that the name will be attached to the complete group - return loc, [ret_tokens] - else: - return loc, ret_tokens - - -class NotAny(ParseElementEnhance): - """ - Lookahead to disallow matching with the given parse expression. - ``NotAny`` does *not* advance the parsing position within the - input string, it only verifies that the specified parse expression - does *not* match at the current position. Also, ``NotAny`` does - *not* skip over leading whitespace. ``NotAny`` always returns - a null token list. May be constructed using the ``'~'`` operator. - - Example:: - - AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) - - # take care not to mistake keywords for identifiers - ident = ~(AND | OR | NOT) + Word(alphas) - boolean_term = Opt(NOT) + ident - - # very crude boolean expression - to support parenthesis groups and - # operation hierarchy, use infix_notation - boolean_expr = boolean_term + ((AND | OR) + boolean_term)[...] - - # integers that are followed by "." are actually floats - integer = Word(nums) + ~Char(".") - """ - - def __init__(self, expr: Union[ParserElement, str]): - super().__init__(expr) - # do NOT use self.leave_whitespace(), don't want to propagate to exprs - # self.leave_whitespace() - self.skipWhitespace = False - - self.mayReturnEmpty = True - self.errmsg = f"Found unwanted token, {self.expr}" - - def parseImpl(self, instring, loc, doActions=True): - if self.expr.can_parse_next(instring, loc, do_actions=doActions): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - - def _generateDefaultName(self) -> str: - return f"~{{{self.expr}}}" - - -class _MultipleMatch(ParseElementEnhance): - def __init__( - self, - expr: Union[str, ParserElement], - stop_on: typing.Optional[Union[ParserElement, str]] = None, - *, - stopOn: typing.Optional[Union[ParserElement, str]] = None, - ): - super().__init__(expr) - stopOn = stopOn or stop_on - self.saveAsList = True - ender = stopOn - if isinstance(ender, str_type): - ender = self._literalStringClass(ender) - self.stopOn(ender) - - def stopOn(self, ender) -> ParserElement: - if isinstance(ender, str_type): - ender = self._literalStringClass(ender) - self.not_ender = ~ender if ender is not None else None - return self - - def parseImpl(self, instring, loc, doActions=True): - self_expr_parse = self.expr._parse - self_skip_ignorables = self._skipIgnorables - check_ender = self.not_ender is not None - if check_ender: - try_not_ender = self.not_ender.try_parse - - # must be at least one (but first see if we are the stopOn sentinel; - # if so, fail) - if check_ender: - try_not_ender(instring, loc) - loc, tokens = self_expr_parse(instring, loc, doActions) - try: - hasIgnoreExprs = not not self.ignoreExprs - while 1: - if check_ender: - try_not_ender(instring, loc) - if hasIgnoreExprs: - preloc = self_skip_ignorables(instring, loc) - else: - preloc = loc - loc, tmptokens = self_expr_parse(instring, preloc, doActions) - tokens += tmptokens - except (ParseException, IndexError): - pass - - return loc, tokens - - def _setResultsName(self, name, listAllMatches=False): - if ( - __diag__.warn_ungrouped_named_tokens_in_collection - and Diagnostics.warn_ungrouped_named_tokens_in_collection - not in self.suppress_warnings_ - ): - for e in [self.expr] + self.expr.recurse(): - if ( - isinstance(e, ParserElement) - and e.resultsName - and ( - Diagnostics.warn_ungrouped_named_tokens_in_collection - not in e.suppress_warnings_ - ) - ): - warning = ( - "warn_ungrouped_named_tokens_in_collection:" - f" setting results name {name!r} on {type(self).__name__} expression" - f" collides with {e.resultsName!r} on contained expression" - ) - warnings.warn(warning, stacklevel=3) - break - - return super()._setResultsName(name, listAllMatches) - - -class OneOrMore(_MultipleMatch): - """ - Repetition of one or more of the given expression. - - Parameters: - - - ``expr`` - expression that must match one or more times - - ``stop_on`` - (default= ``None``) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example:: - - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) - - text = "shape: SQUARE posn: upper left color: BLACK" - attr_expr[1, ...].parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] - - # use stop_on attribute for OneOrMore to avoid reading label string as part of the data - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) - OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] - - # could also be written as - (attr_expr * (1,)).parse_string(text).pprint() - """ - - def _generateDefaultName(self) -> str: - return f"{{{self.expr}}}..." - - -class ZeroOrMore(_MultipleMatch): - """ - Optional repetition of zero or more of the given expression. - - Parameters: - - - ``expr`` - expression that must match zero or more times - - ``stop_on`` - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - (default= ``None``) - - Example: similar to :class:`OneOrMore` - """ - - def __init__( - self, - expr: Union[str, ParserElement], - stop_on: typing.Optional[Union[ParserElement, str]] = None, - *, - stopOn: typing.Optional[Union[ParserElement, str]] = None, - ): - super().__init__(expr, stopOn=stopOn or stop_on) - self.mayReturnEmpty = True - - def parseImpl(self, instring, loc, doActions=True): - try: - return super().parseImpl(instring, loc, doActions) - except (ParseException, IndexError): - return loc, ParseResults([], name=self.resultsName) - - def _generateDefaultName(self) -> str: - return f"[{self.expr}]..." - - -class DelimitedList(ParseElementEnhance): - def __init__( - self, - expr: Union[str, ParserElement], - delim: Union[str, ParserElement] = ",", - combine: bool = False, - min: typing.Optional[int] = None, - max: typing.Optional[int] = None, - *, - allow_trailing_delim: bool = False, - ): - """Helper to define a delimited list of expressions - the delimiter - defaults to ','. By default, the list elements and delimiters can - have intervening whitespace, and comments, but this can be - overridden by passing ``combine=True`` in the constructor. If - ``combine`` is set to ``True``, the matching tokens are - returned as a single token string, with the delimiters included; - otherwise, the matching tokens are returned as a list of tokens, - with the delimiters suppressed. - - If ``allow_trailing_delim`` is set to True, then the list may end with - a delimiter. - - Example:: - - DelimitedList(Word(alphas)).parse_string("aa,bb,cc") # -> ['aa', 'bb', 'cc'] - DelimitedList(Word(hexnums), delim=':', combine=True).parse_string("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] - """ - if isinstance(expr, str_type): - expr = ParserElement._literalStringClass(expr) - expr = typing.cast(ParserElement, expr) - - if min is not None and min < 1: - raise ValueError("min must be greater than 0") - - if max is not None and min is not None and max < min: - raise ValueError("max must be greater than, or equal to min") - - self.content = expr - self.raw_delim = str(delim) - self.delim = delim - self.combine = combine - if not combine: - self.delim = Suppress(delim) - self.min = min or 1 - self.max = max - self.allow_trailing_delim = allow_trailing_delim - - delim_list_expr = self.content + (self.delim + self.content) * ( - self.min - 1, - None if self.max is None else self.max - 1, - ) - if self.allow_trailing_delim: - delim_list_expr += Opt(self.delim) - - if self.combine: - delim_list_expr = Combine(delim_list_expr) - - super().__init__(delim_list_expr, savelist=True) - - def _generateDefaultName(self) -> str: - content_expr = self.content.streamline() - return f"{content_expr} [{self.raw_delim} {content_expr}]..." - - -class _NullToken: - def __bool__(self): - return False - - def __str__(self): - return "" - - -class Opt(ParseElementEnhance): - """ - Optional matching of the given expression. - - Parameters: - - - ``expr`` - expression that must match zero or more times - - ``default`` (optional) - value to be returned if the optional expression is not found. - - Example:: - - # US postal code can be a 5-digit zip, plus optional 4-digit qualifier - zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) - zip.run_tests(''' - # traditional ZIP code - 12345 - - # ZIP+4 form - 12101-0001 - - # invalid ZIP - 98765- - ''') - - prints:: - - # traditional ZIP code - 12345 - ['12345'] - - # ZIP+4 form - 12101-0001 - ['12101-0001'] - - # invalid ZIP - 98765- - ^ - FAIL: Expected end of text (at char 5), (line:1, col:6) - """ - - __optionalNotMatched = _NullToken() - - def __init__( - self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched - ): - super().__init__(expr, savelist=False) - self.saveAsList = self.expr.saveAsList - self.defaultValue = default - self.mayReturnEmpty = True - - def parseImpl(self, instring, loc, doActions=True): - self_expr = self.expr - try: - loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) - except (ParseException, IndexError): - default_value = self.defaultValue - if default_value is not self.__optionalNotMatched: - if self_expr.resultsName: - tokens = ParseResults([default_value]) - tokens[self_expr.resultsName] = default_value - else: - tokens = [default_value] - else: - tokens = [] - return loc, tokens - - def _generateDefaultName(self) -> str: - inner = str(self.expr) - # strip off redundant inner {}'s - while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": - inner = inner[1:-1] - return f"[{inner}]" - - -Optional = Opt - - -class SkipTo(ParseElementEnhance): - """ - Token for skipping over all undefined text until the matched - expression is found. - - Parameters: - - - ``expr`` - target expression marking the end of the data to be skipped - - ``include`` - if ``True``, the target expression is also parsed - (the skipped text and target expression are returned as a 2-element - list) (default= ``False``). - - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and - comments) that might contain false matches to the target expression - - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be - included in the skipped test; if found before the target expression is found, - the :class:`SkipTo` is not a match - - Example:: - - report = ''' - Outstanding Issues Report - 1 Jan 2000 - - # | Severity | Description | Days Open - -----+----------+-------------------------------------------+----------- - 101 | Critical | Intermittent system crash | 6 - 94 | Cosmetic | Spelling error on Login ('log|n') | 14 - 79 | Minor | System slow when running too many reports | 47 - ''' - integer = Word(nums) - SEP = Suppress('|') - # use SkipTo to simply match everything up until the next SEP - # - ignore quoted strings, so that a '|' character inside a quoted string does not match - # - parse action will call token.strip() for each matched token, i.e., the description body - string_data = SkipTo(SEP, ignore=quoted_string) - string_data.set_parse_action(token_map(str.strip)) - ticket_expr = (integer("issue_num") + SEP - + string_data("sev") + SEP - + string_data("desc") + SEP - + integer("days_open")) - - for tkt in ticket_expr.search_string(report): - print tkt.dump() - - prints:: - - ['101', 'Critical', 'Intermittent system crash', '6'] - - days_open: '6' - - desc: 'Intermittent system crash' - - issue_num: '101' - - sev: 'Critical' - ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] - - days_open: '14' - - desc: "Spelling error on Login ('log|n')" - - issue_num: '94' - - sev: 'Cosmetic' - ['79', 'Minor', 'System slow when running too many reports', '47'] - - days_open: '47' - - desc: 'System slow when running too many reports' - - issue_num: '79' - - sev: 'Minor' - """ - - def __init__( - self, - other: Union[ParserElement, str], - include: bool = False, - ignore: typing.Optional[Union[ParserElement, str]] = None, - fail_on: typing.Optional[Union[ParserElement, str]] = None, - *, - failOn: typing.Optional[Union[ParserElement, str]] = None, - ): - super().__init__(other) - failOn = failOn or fail_on - self.ignoreExpr = ignore - self.mayReturnEmpty = True - self.mayIndexError = False - self.includeMatch = include - self.saveAsList = False - if isinstance(failOn, str_type): - self.failOn = self._literalStringClass(failOn) - else: - self.failOn = failOn - self.errmsg = "No match found for " + str(self.expr) - self.ignorer = Empty().leave_whitespace() - self._update_ignorer() - - def _update_ignorer(self): - # rebuild internal ignore expr from current ignore exprs and assigned ignoreExpr - self.ignorer.ignoreExprs.clear() - for e in self.expr.ignoreExprs: - self.ignorer.ignore(e) - if self.ignoreExpr: - self.ignorer.ignore(self.ignoreExpr) - - def ignore(self, expr): - super().ignore(expr) - self._update_ignorer() - - def parseImpl(self, instring, loc, doActions=True): - startloc = loc - instrlen = len(instring) - self_expr_parse = self.expr._parse - self_failOn_canParseNext = ( - self.failOn.canParseNext if self.failOn is not None else None - ) - ignorer_try_parse = self.ignorer.try_parse if self.ignorer.ignoreExprs else None - - tmploc = loc - while tmploc <= instrlen: - if self_failOn_canParseNext is not None: - # break if failOn expression matches - if self_failOn_canParseNext(instring, tmploc): - break - - if ignorer_try_parse is not None: - # advance past ignore expressions - prev_tmploc = tmploc - while 1: - try: - tmploc = ignorer_try_parse(instring, tmploc) - except ParseBaseException: - break - # see if all ignorers matched, but didn't actually ignore anything - if tmploc == prev_tmploc: - break - prev_tmploc = tmploc - - try: - self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) - except (ParseException, IndexError): - # no match, advance loc in string - tmploc += 1 - else: - # matched skipto expr, done - break - - else: - # ran off the end of the input string without matching skipto expr, fail - raise ParseException(instring, loc, self.errmsg, self) - - # build up return values - loc = tmploc - skiptext = instring[startloc:loc] - skipresult = ParseResults(skiptext) - - if self.includeMatch: - loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) - skipresult += mat - - return loc, skipresult - - -class Forward(ParseElementEnhance): - """ - Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the ``Forward`` - variable using the ``'<<'`` operator. - - Note: take care when assigning to ``Forward`` not to overlook - precedence of operators. - - Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: - - fwd_expr << a | b | c - - will actually be evaluated as:: - - (fwd_expr << a) | b | c - - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the ``Forward``:: - - fwd_expr << (a | b | c) - - Converting to use the ``'<<='`` operator instead will avoid this problem. - - See :class:`ParseResults.pprint` for an example of a recursive - parser created using ``Forward``. - """ - - def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): - self.caller_frame = traceback.extract_stack(limit=2)[0] - super().__init__(other, savelist=False) # type: ignore[arg-type] - self.lshift_line = None - - def __lshift__(self, other) -> "Forward": - if hasattr(self, "caller_frame"): - del self.caller_frame - if isinstance(other, str_type): - other = self._literalStringClass(other) - - if not isinstance(other, ParserElement): - return NotImplemented - - self.expr = other - self.streamlined = other.streamlined - self.mayIndexError = self.expr.mayIndexError - self.mayReturnEmpty = self.expr.mayReturnEmpty - self.set_whitespace_chars( - self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars - ) - self.skipWhitespace = self.expr.skipWhitespace - self.saveAsList = self.expr.saveAsList - self.ignoreExprs.extend(self.expr.ignoreExprs) - self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] - return self - - def __ilshift__(self, other) -> "Forward": - if not isinstance(other, ParserElement): - return NotImplemented - - return self << other - - def __or__(self, other) -> "ParserElement": - caller_line = traceback.extract_stack(limit=2)[-2] - if ( - __diag__.warn_on_match_first_with_lshift_operator - and caller_line == self.lshift_line - and Diagnostics.warn_on_match_first_with_lshift_operator - not in self.suppress_warnings_ - ): - warnings.warn( - "using '<<' operator with '|' is probably an error, use '<<='", - stacklevel=2, - ) - ret = super().__or__(other) - return ret - - def __del__(self): - # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' - if ( - self.expr is None - and __diag__.warn_on_assignment_to_Forward - and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ - ): - warnings.warn_explicit( - "Forward defined here but no expression attached later using '<<=' or '<<'", - UserWarning, - filename=self.caller_frame.filename, - lineno=self.caller_frame.lineno, - ) - - def parseImpl(self, instring, loc, doActions=True): - if ( - self.expr is None - and __diag__.warn_on_parse_using_empty_Forward - and Diagnostics.warn_on_parse_using_empty_Forward - not in self.suppress_warnings_ - ): - # walk stack until parse_string, scan_string, search_string, or transform_string is found - parse_fns = ( - "parse_string", - "scan_string", - "search_string", - "transform_string", - ) - tb = traceback.extract_stack(limit=200) - for i, frm in enumerate(reversed(tb), start=1): - if frm.name in parse_fns: - stacklevel = i + 1 - break - else: - stacklevel = 2 - warnings.warn( - "Forward expression was never assigned a value, will not parse any input", - stacklevel=stacklevel, - ) - if not ParserElement._left_recursion_enabled: - return super().parseImpl(instring, loc, doActions) - # ## Bounded Recursion algorithm ## - # Recursion only needs to be processed at ``Forward`` elements, since they are - # the only ones that can actually refer to themselves. The general idea is - # to handle recursion stepwise: We start at no recursion, then recurse once, - # recurse twice, ..., until more recursion offers no benefit (we hit the bound). - # - # The "trick" here is that each ``Forward`` gets evaluated in two contexts - # - to *match* a specific recursion level, and - # - to *search* the bounded recursion level - # and the two run concurrently. The *search* must *match* each recursion level - # to find the best possible match. This is handled by a memo table, which - # provides the previous match to the next level match attempt. - # - # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. - # - # There is a complication since we not only *parse* but also *transform* via - # actions: We do not want to run the actions too often while expanding. Thus, - # we expand using `doActions=False` and only run `doActions=True` if the next - # recursion level is acceptable. - with ParserElement.recursion_lock: - memo = ParserElement.recursion_memos - try: - # we are parsing at a specific recursion expansion - use it as-is - prev_loc, prev_result = memo[loc, self, doActions] - if isinstance(prev_result, Exception): - raise prev_result - return prev_loc, prev_result.copy() - except KeyError: - act_key = (loc, self, True) - peek_key = (loc, self, False) - # we are searching for the best recursion expansion - keep on improving - # both `doActions` cases must be tracked separately here! - prev_loc, prev_peek = memo[peek_key] = ( - loc - 1, - ParseException( - instring, loc, "Forward recursion without base case", self - ), - ) - if doActions: - memo[act_key] = memo[peek_key] - while True: - try: - new_loc, new_peek = super().parseImpl(instring, loc, False) - except ParseException: - # we failed before getting any match – do not hide the error - if isinstance(prev_peek, Exception): - raise - new_loc, new_peek = prev_loc, prev_peek - # the match did not get better: we are done - if new_loc <= prev_loc: - if doActions: - # replace the match for doActions=False as well, - # in case the action did backtrack - prev_loc, prev_result = memo[peek_key] = memo[act_key] - del memo[peek_key], memo[act_key] - return prev_loc, prev_result.copy() - del memo[peek_key] - return prev_loc, prev_peek.copy() - # the match did get better: see if we can improve further - if doActions: - try: - memo[act_key] = super().parseImpl(instring, loc, True) - except ParseException as e: - memo[peek_key] = memo[act_key] = (new_loc, e) - raise - prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek - - def leave_whitespace(self, recursive: bool = True) -> ParserElement: - self.skipWhitespace = False - return self - - def ignore_whitespace(self, recursive: bool = True) -> ParserElement: - self.skipWhitespace = True - return self - - def streamline(self) -> ParserElement: - if not self.streamlined: - self.streamlined = True - if self.expr is not None: - self.expr.streamline() - return self - - def validate(self, validateTrace=None) -> None: - warnings.warn( - "ParserElement.validate() is deprecated, and should not be used to check for left recursion", - DeprecationWarning, - stacklevel=2, - ) - if validateTrace is None: - validateTrace = [] - - if self not in validateTrace: - tmp = validateTrace[:] + [self] - if self.expr is not None: - self.expr.validate(tmp) - self._checkRecursion([]) - - def _generateDefaultName(self) -> str: - # Avoid infinite recursion by setting a temporary _defaultName - self._defaultName = ": ..." - - # Use the string representation of main expression. - retString = "..." - try: - if self.expr is not None: - retString = str(self.expr)[:1000] - else: - retString = "None" - finally: - return f"{type(self).__name__}: {retString}" - - def copy(self) -> ParserElement: - if self.expr is not None: - return super().copy() - else: - ret = Forward() - ret <<= self - return ret - - def _setResultsName(self, name, list_all_matches=False): - # fmt: off - if ( - __diag__.warn_name_set_on_empty_Forward - and Diagnostics.warn_name_set_on_empty_Forward not in self.suppress_warnings_ - and self.expr is None - ): - warning = ( - "warn_name_set_on_empty_Forward:" - f" setting results name {name!r} on {type(self).__name__} expression" - " that has no contained expression" - ) - warnings.warn(warning, stacklevel=3) - # fmt: on - - return super()._setResultsName(name, list_all_matches) - - # Compatibility synonyms - # fmt: off - leaveWhitespace = replaced_by_pep8("leaveWhitespace", leave_whitespace) - ignoreWhitespace = replaced_by_pep8("ignoreWhitespace", ignore_whitespace) - # fmt: on - - -class TokenConverter(ParseElementEnhance): - """ - Abstract subclass of :class:`ParseExpression`, for converting parsed results. - """ - - def __init__(self, expr: Union[ParserElement, str], savelist=False): - super().__init__(expr) # , savelist) - self.saveAsList = False - - -class Combine(TokenConverter): - """Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the - input string; this can be disabled by specifying - ``'adjacent=False'`` in the constructor. - - Example:: - - real = Word(nums) + '.' + Word(nums) - print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] - # will also erroneously match the following - print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] - - real = Combine(Word(nums) + '.' + Word(nums)) - print(real.parse_string('3.1416')) # -> ['3.1416'] - # no match when there are internal spaces - print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) - """ - - def __init__( - self, - expr: ParserElement, - join_string: str = "", - adjacent: bool = True, - *, - joinString: typing.Optional[str] = None, - ): - super().__init__(expr) - joinString = joinString if joinString is not None else join_string - # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself - if adjacent: - self.leave_whitespace() - self.adjacent = adjacent - self.skipWhitespace = True - self.joinString = joinString - self.callPreparse = True - - def ignore(self, other) -> ParserElement: - if self.adjacent: - ParserElement.ignore(self, other) - else: - super().ignore(other) - return self - - def postParse(self, instring, loc, tokenlist): - retToks = tokenlist.copy() - del retToks[:] - retToks += ParseResults( - ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults - ) - - if self.resultsName and retToks.haskeys(): - return [retToks] - else: - return retToks - - -class Group(TokenConverter): - """Converter to return the matched tokens as a list - useful for - returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. - - The optional ``aslist`` argument when set to True will return the - parsed tokens as a Python list instead of a pyparsing ParseResults. - - Example:: - - ident = Word(alphas) - num = Word(nums) - term = ident | num - func = ident + Opt(DelimitedList(term)) - print(func.parse_string("fn a, b, 100")) - # -> ['fn', 'a', 'b', '100'] - - func = ident + Group(Opt(DelimitedList(term))) - print(func.parse_string("fn a, b, 100")) - # -> ['fn', ['a', 'b', '100']] - """ - - def __init__(self, expr: ParserElement, aslist: bool = False): - super().__init__(expr) - self.saveAsList = True - self._asPythonList = aslist - - def postParse(self, instring, loc, tokenlist): - if self._asPythonList: - return ParseResults.List( - tokenlist.asList() - if isinstance(tokenlist, ParseResults) - else list(tokenlist) - ) - - return [tokenlist] - - -class Dict(TokenConverter): - """Converter to return a repetitive expression as a list, but also - as a dictionary. Each element can also be referenced using the first - token in the expression as its key. Useful for tabular report - scraping when the first column can be used as a item key. - - The optional ``asdict`` argument when set to True will return the - parsed tokens as a Python dict instead of a pyparsing ParseResults. - - Example:: - - data_word = Word(alphas) - label = data_word + FollowedBy(':') - - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) - - # print attributes as plain groups - print(attr_expr[1, ...].parse_string(text).dump()) - - # instead of OneOrMore(expr), parse using Dict(Group(expr)[1, ...]) - Dict will auto-assign names - result = Dict(Group(attr_expr)[1, ...]).parse_string(text) - print(result.dump()) - - # access named fields as dict entries, or output as dict - print(result['shape']) - print(result.as_dict()) - - prints:: - - ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: 'light blue' - - posn: 'upper left' - - shape: 'SQUARE' - - texture: 'burlap' - SQUARE - {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} - - See more examples at :class:`ParseResults` of accessing fields by results name. - """ - - def __init__(self, expr: ParserElement, asdict: bool = False): - super().__init__(expr) - self.saveAsList = True - self._asPythonDict = asdict - - def postParse(self, instring, loc, tokenlist): - for i, tok in enumerate(tokenlist): - if len(tok) == 0: - continue - - ikey = tok[0] - if isinstance(ikey, int): - ikey = str(ikey).strip() - - if len(tok) == 1: - tokenlist[ikey] = _ParseResultsWithOffset("", i) - - elif len(tok) == 2 and not isinstance(tok[1], ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) - - else: - try: - dictvalue = tok.copy() # ParseResults(i) - except Exception: - exc = TypeError( - "could not extract dict values from parsed results" - " - Dict expression must contain Grouped expressions" - ) - raise exc from None - - del dictvalue[0] - - if len(dictvalue) != 1 or ( - isinstance(dictvalue, ParseResults) and dictvalue.haskeys() - ): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) - else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) - - if self._asPythonDict: - return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() - - return [tokenlist] if self.resultsName else tokenlist - - -class Suppress(TokenConverter): - """Converter for ignoring the results of a parsed expression. - - Example:: - - source = "a, b, c,d" - wd = Word(alphas) - wd_list1 = wd + (',' + wd)[...] - print(wd_list1.parse_string(source)) - - # often, delimiters that are useful during parsing are just in the - # way afterward - use Suppress to keep them out of the parsed output - wd_list2 = wd + (Suppress(',') + wd)[...] - print(wd_list2.parse_string(source)) - - # Skipped text (using '...') can be suppressed as well - source = "lead in START relevant text END trailing text" - start_marker = Keyword("START") - end_marker = Keyword("END") - find_body = Suppress(...) + start_marker + ... + end_marker - print(find_body.parse_string(source) - - prints:: - - ['a', ',', 'b', ',', 'c', ',', 'd'] - ['a', 'b', 'c', 'd'] - ['START', 'relevant text ', 'END'] - - (See also :class:`DelimitedList`.) - """ - - def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): - if expr is ...: - expr = _PendingSkip(NoMatch()) - super().__init__(expr) - - def __add__(self, other) -> "ParserElement": - if isinstance(self.expr, _PendingSkip): - return Suppress(SkipTo(other)) + other - - return super().__add__(other) - - def __sub__(self, other) -> "ParserElement": - if isinstance(self.expr, _PendingSkip): - return Suppress(SkipTo(other)) - other - - return super().__sub__(other) - - def postParse(self, instring, loc, tokenlist): - return [] - - def suppress(self) -> ParserElement: - return self - - -def trace_parse_action(f: ParseAction) -> ParseAction: - """Decorator for debugging parse actions. - - When the parse action is called, this decorator will print - ``">> entering method-name(line:, , )"``. - When the parse action completes, the decorator will print - ``"<<"`` followed by the returned value, or any exception that the parse action raised. - - Example:: - - wd = Word(alphas) - - @trace_parse_action - def remove_duplicate_chars(tokens): - return ''.join(sorted(set(''.join(tokens)))) - - wds = wd[1, ...].set_parse_action(remove_duplicate_chars) - print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) - - prints:: - - >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) - < 3: - thisFunc = f"{type(paArgs[0]).__name__}.{thisFunc}" - sys.stderr.write(f">>entering {thisFunc}(line: {line(l, s)!r}, {l}, {t!r})\n") - try: - ret = f(*paArgs) - except Exception as exc: - sys.stderr.write(f"< str: - r"""Helper to easily define string ranges for use in :class:`Word` - construction. Borrows syntax from regexp ``'[]'`` string range - definitions:: - - srange("[0-9]") -> "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - - The input string must be enclosed in []'s, and the returned string - is the expanded character set joined into a single string. The - values enclosed in the []'s may be: - - - a single character - - an escaped character with a leading backslash (such as ``\-`` - or ``\]``) - - an escaped hex character with a leading ``'\x'`` - (``\x21``, which is a ``'!'`` character) (``\0x##`` - is also supported for backwards compatibility) - - an escaped octal character with a leading ``'\0'`` - (``\041``, which is a ``'!'`` character) - - a range of any of the above, separated by a dash (``'a-z'``, - etc.) - - any combination of the above (``'aeiouy'``, - ``'a-zA-Z0-9_$'``, etc.) - """ - _expanded = lambda p: ( - p - if not isinstance(p, ParseResults) - else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) - ) - try: - return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) - except Exception as e: - return "" - - -def token_map(func, *args) -> ParseAction: - """Helper to define a parse action by mapping a function to all - elements of a :class:`ParseResults` list. If any additional args are passed, - they are forwarded to the given function as additional arguments - after the token, as in - ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, - which will convert the parsed data to an integer using base 16. - - Example (compare the last to example in :class:`ParserElement.transform_string`:: - - hex_ints = Word(hexnums)[1, ...].set_parse_action(token_map(int, 16)) - hex_ints.run_tests(''' - 00 11 22 aa FF 0a 0d 1a - ''') - - upperword = Word(alphas).set_parse_action(token_map(str.upper)) - upperword[1, ...].run_tests(''' - my kingdom for a horse - ''') - - wd = Word(alphas).set_parse_action(token_map(str.title)) - wd[1, ...].set_parse_action(' '.join).run_tests(''' - now is the winter of our discontent made glorious summer by this sun of york - ''') - - prints:: - - 00 11 22 aa FF 0a 0d 1a - [0, 17, 34, 170, 255, 10, 13, 26] - - my kingdom for a horse - ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] - - now is the winter of our discontent made glorious summer by this sun of york - ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] - """ - - def pa(s, l, t): - return [func(tokn, *args) for tokn in t] - - func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) - pa.__name__ = func_name - - return pa - - -def autoname_elements() -> None: - """ - Utility to simplify mass-naming of parser elements, for - generating railroad diagram with named subdiagrams. - """ - calling_frame = sys._getframe().f_back - if calling_frame is None: - return - calling_frame = typing.cast(types.FrameType, calling_frame) - for name, var in calling_frame.f_locals.items(): - if isinstance(var, ParserElement) and not var.customName: - var.set_name(name) - - -dbl_quoted_string = Combine( - Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' -).set_name("string enclosed in double quotes") - -sgl_quoted_string = Combine( - Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" -).set_name("string enclosed in single quotes") - -quoted_string = Combine( - (Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( - "double quoted string" - ) - | (Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( - "single quoted string" - ) -).set_name("quoted string using single or double quotes") - -python_quoted_string = Combine( - (Regex(r'"""(?:[^"\\]|""(?!")|"(?!"")|\\.)*', flags=re.MULTILINE) + '"""').set_name( - "multiline double quoted string" - ) - ^ ( - Regex(r"'''(?:[^'\\]|''(?!')|'(?!'')|\\.)*", flags=re.MULTILINE) + "'''" - ).set_name("multiline single quoted string") - ^ (Regex(r'"(?:[^"\n\r\\]|(?:\\")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').set_name( - "double quoted string" - ) - ^ (Regex(r"'(?:[^'\n\r\\]|(?:\\')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").set_name( - "single quoted string" - ) -).set_name("Python quoted string") - -unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") - - -alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") -punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") - -# build list of built-in expressions, for future reference if a global default value -# gets updated -_builtin_exprs: List[ParserElement] = [ - v for v in vars().values() if isinstance(v, ParserElement) -] - -# backward compatibility names -# fmt: off -sglQuotedString = sgl_quoted_string -dblQuotedString = dbl_quoted_string -quotedString = quoted_string -unicodeString = unicode_string -lineStart = line_start -lineEnd = line_end -stringStart = string_start -stringEnd = string_end -nullDebugAction = replaced_by_pep8("nullDebugAction", null_debug_action) -traceParseAction = replaced_by_pep8("traceParseAction", trace_parse_action) -conditionAsParseAction = replaced_by_pep8("conditionAsParseAction", condition_as_parse_action) -tokenMap = replaced_by_pep8("tokenMap", token_map) -# fmt: on diff --git a/src/pip/_vendor/pyparsing/diagram/__init__.py b/src/pip/_vendor/pyparsing/diagram/__init__.py deleted file mode 100644 index 6074d2bfd0f..00000000000 --- a/src/pip/_vendor/pyparsing/diagram/__init__.py +++ /dev/null @@ -1,656 +0,0 @@ -# mypy: ignore-errors -import railroad -from pip._vendor import pyparsing -import typing -from typing import ( - List, - NamedTuple, - Generic, - TypeVar, - Dict, - Callable, - Set, - Iterable, -) -from jinja2 import Template -from io import StringIO -import inspect - - -jinja2_template_source = """\ -{% if not embed %} - - - -{% endif %} - {% if not head %} - - {% else %} - {{ head | safe }} - {% endif %} -{% if not embed %} - - -{% endif %} -{{ body | safe }} -{% for diagram in diagrams %} -
-

{{ diagram.title }}

-
{{ diagram.text }}
-
- {{ diagram.svg }} -
-
-{% endfor %} -{% if not embed %} - - -{% endif %} -""" - -template = Template(jinja2_template_source) - -# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet -NamedDiagram = NamedTuple( - "NamedDiagram", - [("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)], -) -""" -A simple structure for associating a name with a railroad diagram -""" - -T = TypeVar("T") - - -class EachItem(railroad.Group): - """ - Custom railroad item to compose a: - - Group containing a - - OneOrMore containing a - - Choice of the elements in the Each - with the group label indicating that all must be matched - """ - - all_label = "[ALL]" - - def __init__(self, *items): - choice_item = railroad.Choice(len(items) - 1, *items) - one_or_more_item = railroad.OneOrMore(item=choice_item) - super().__init__(one_or_more_item, label=self.all_label) - - -class AnnotatedItem(railroad.Group): - """ - Simple subclass of Group that creates an annotation label - """ - - def __init__(self, label: str, item): - super().__init__(item=item, label="[{}]".format(label) if label else label) - - -class EditablePartial(Generic[T]): - """ - Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been - constructed. - """ - - # We need this here because the railroad constructors actually transform the data, so can't be called until the - # entire tree is assembled - - def __init__(self, func: Callable[..., T], args: list, kwargs: dict): - self.func = func - self.args = args - self.kwargs = kwargs - - @classmethod - def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]": - """ - If you call this function in the same way that you would call the constructor, it will store the arguments - as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3) - """ - return EditablePartial(func=func, args=list(args), kwargs=kwargs) - - @property - def name(self): - return self.kwargs["name"] - - def __call__(self) -> T: - """ - Evaluate the partial and return the result - """ - args = self.args.copy() - kwargs = self.kwargs.copy() - - # This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g. - # args=['list', 'of', 'things']) - arg_spec = inspect.getfullargspec(self.func) - if arg_spec.varargs in self.kwargs: - args += kwargs.pop(arg_spec.varargs) - - return self.func(*args, **kwargs) - - -def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str: - """ - Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams - :params kwargs: kwargs to be passed in to the template - """ - data = [] - for diagram in diagrams: - if diagram.diagram is None: - continue - io = StringIO() - try: - css = kwargs.get('css') - diagram.diagram.writeStandalone(io.write, css=css) - except AttributeError: - diagram.diagram.writeSvg(io.write) - title = diagram.name - if diagram.index == 0: - title += " (root)" - data.append({"title": title, "text": "", "svg": io.getvalue()}) - - return template.render(diagrams=data, embed=embed, **kwargs) - - -def resolve_partial(partial: "EditablePartial[T]") -> T: - """ - Recursively resolves a collection of Partials into whatever type they are - """ - if isinstance(partial, EditablePartial): - partial.args = resolve_partial(partial.args) - partial.kwargs = resolve_partial(partial.kwargs) - return partial() - elif isinstance(partial, list): - return [resolve_partial(x) for x in partial] - elif isinstance(partial, dict): - return {key: resolve_partial(x) for key, x in partial.items()} - else: - return partial - - -def to_railroad( - element: pyparsing.ParserElement, - diagram_kwargs: typing.Optional[dict] = None, - vertical: int = 3, - show_results_names: bool = False, - show_groups: bool = False, -) -> List[NamedDiagram]: - """ - Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram - creation if you want to access the Railroad tree before it is converted to HTML - :param element: base element of the parser being diagrammed - :param diagram_kwargs: kwargs to pass to the Diagram() constructor - :param vertical: (optional) - int - limit at which number of alternatives should be - shown vertically instead of horizontally - :param show_results_names - bool to indicate whether results name annotations should be - included in the diagram - :param show_groups - bool to indicate whether groups should be highlighted with an unlabeled - surrounding box - """ - # Convert the whole tree underneath the root - lookup = ConverterState(diagram_kwargs=diagram_kwargs or {}) - _to_diagram_element( - element, - lookup=lookup, - parent=None, - vertical=vertical, - show_results_names=show_results_names, - show_groups=show_groups, - ) - - root_id = id(element) - # Convert the root if it hasn't been already - if root_id in lookup: - if not element.customName: - lookup[root_id].name = "" - lookup[root_id].mark_for_extraction(root_id, lookup, force=True) - - # Now that we're finished, we can convert from intermediate structures into Railroad elements - diags = list(lookup.diagrams.values()) - if len(diags) > 1: - # collapse out duplicate diags with the same name - seen = set() - deduped_diags = [] - for d in diags: - # don't extract SkipTo elements, they are uninformative as subdiagrams - if d.name == "...": - continue - if d.name is not None and d.name not in seen: - seen.add(d.name) - deduped_diags.append(d) - resolved = [resolve_partial(partial) for partial in deduped_diags] - else: - # special case - if just one diagram, always display it, even if - # it has no name - resolved = [resolve_partial(partial) for partial in diags] - return sorted(resolved, key=lambda diag: diag.index) - - -def _should_vertical( - specification: int, exprs: Iterable[pyparsing.ParserElement] -) -> bool: - """ - Returns true if we should return a vertical list of elements - """ - if specification is None: - return False - else: - return len(_visible_exprs(exprs)) >= specification - - -class ElementState: - """ - State recorded for an individual pyparsing Element - """ - - # Note: this should be a dataclass, but we have to support Python 3.5 - def __init__( - self, - element: pyparsing.ParserElement, - converted: EditablePartial, - parent: EditablePartial, - number: int, - name: str = None, - parent_index: typing.Optional[int] = None, - ): - #: The pyparsing element that this represents - self.element: pyparsing.ParserElement = element - #: The name of the element - self.name: typing.Optional[str] = name - #: The output Railroad element in an unconverted state - self.converted: EditablePartial = converted - #: The parent Railroad element, which we store so that we can extract this if it's duplicated - self.parent: EditablePartial = parent - #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram - self.number: int = number - #: The index of this inside its parent - self.parent_index: typing.Optional[int] = parent_index - #: If true, we should extract this out into a subdiagram - self.extract: bool = False - #: If true, all of this element's children have been filled out - self.complete: bool = False - - def mark_for_extraction( - self, el_id: int, state: "ConverterState", name: str = None, force: bool = False - ): - """ - Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram - :param el_id: id of the element - :param state: element/diagram state tracker - :param name: name to use for this element's text - :param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the - root element when we know we're finished - """ - self.extract = True - - # Set the name - if not self.name: - if name: - # Allow forcing a custom name - self.name = name - elif self.element.customName: - self.name = self.element.customName - else: - self.name = "" - - # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children - # to be added - # Also, if this is just a string literal etc, don't bother extracting it - if force or (self.complete and _worth_extracting(self.element)): - state.extract_into_diagram(el_id) - - -class ConverterState: - """ - Stores some state that persists between recursions into the element tree - """ - - def __init__(self, diagram_kwargs: typing.Optional[dict] = None): - #: A dictionary mapping ParserElements to state relating to them - self._element_diagram_states: Dict[int, ElementState] = {} - #: A dictionary mapping ParserElement IDs to subdiagrams generated from them - self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {} - #: The index of the next unnamed element - self.unnamed_index: int = 1 - #: The index of the next element. This is used for sorting - self.index: int = 0 - #: Shared kwargs that are used to customize the construction of diagrams - self.diagram_kwargs: dict = diagram_kwargs or {} - self.extracted_diagram_names: Set[str] = set() - - def __setitem__(self, key: int, value: ElementState): - self._element_diagram_states[key] = value - - def __getitem__(self, key: int) -> ElementState: - return self._element_diagram_states[key] - - def __delitem__(self, key: int): - del self._element_diagram_states[key] - - def __contains__(self, key: int): - return key in self._element_diagram_states - - def generate_unnamed(self) -> int: - """ - Generate a number used in the name of an otherwise unnamed diagram - """ - self.unnamed_index += 1 - return self.unnamed_index - - def generate_index(self) -> int: - """ - Generate a number used to index a diagram - """ - self.index += 1 - return self.index - - def extract_into_diagram(self, el_id: int): - """ - Used when we encounter the same token twice in the same tree. When this - happens, we replace all instances of that token with a terminal, and - create a new subdiagram for the token - """ - position = self[el_id] - - # Replace the original definition of this element with a regular block - if position.parent: - ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name) - if "item" in position.parent.kwargs: - position.parent.kwargs["item"] = ret - elif "items" in position.parent.kwargs: - position.parent.kwargs["items"][position.parent_index] = ret - - # If the element we're extracting is a group, skip to its content but keep the title - if position.converted.func == railroad.Group: - content = position.converted.kwargs["item"] - else: - content = position.converted - - self.diagrams[el_id] = EditablePartial.from_call( - NamedDiagram, - name=position.name, - diagram=EditablePartial.from_call( - railroad.Diagram, content, **self.diagram_kwargs - ), - index=position.number, - ) - - del self[el_id] - - -def _worth_extracting(element: pyparsing.ParserElement) -> bool: - """ - Returns true if this element is worth having its own sub-diagram. Simply, if any of its children - themselves have children, then its complex enough to extract - """ - children = element.recurse() - return any(child.recurse() for child in children) - - -def _apply_diagram_item_enhancements(fn): - """ - decorator to ensure enhancements to a diagram item (such as results name annotations) - get applied on return from _to_diagram_element (we do this since there are several - returns in _to_diagram_element) - """ - - def _inner( - element: pyparsing.ParserElement, - parent: typing.Optional[EditablePartial], - lookup: ConverterState = None, - vertical: int = None, - index: int = 0, - name_hint: str = None, - show_results_names: bool = False, - show_groups: bool = False, - ) -> typing.Optional[EditablePartial]: - ret = fn( - element, - parent, - lookup, - vertical, - index, - name_hint, - show_results_names, - show_groups, - ) - - # apply annotation for results name, if present - if show_results_names and ret is not None: - element_results_name = element.resultsName - if element_results_name: - # add "*" to indicate if this is a "list all results" name - element_results_name += "" if element.modalResults else "*" - ret = EditablePartial.from_call( - railroad.Group, item=ret, label=element_results_name - ) - - return ret - - return _inner - - -def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]): - non_diagramming_exprs = ( - pyparsing.ParseElementEnhance, - pyparsing.PositionToken, - pyparsing.And._ErrorStop, - ) - return [ - e - for e in exprs - if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs)) - ] - - -@_apply_diagram_item_enhancements -def _to_diagram_element( - element: pyparsing.ParserElement, - parent: typing.Optional[EditablePartial], - lookup: ConverterState = None, - vertical: int = None, - index: int = 0, - name_hint: str = None, - show_results_names: bool = False, - show_groups: bool = False, -) -> typing.Optional[EditablePartial]: - """ - Recursively converts a PyParsing Element to a railroad Element - :param lookup: The shared converter state that keeps track of useful things - :param index: The index of this element within the parent - :param parent: The parent of this element in the output tree - :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default), - it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never - do so - :param name_hint: If provided, this will override the generated name - :param show_results_names: bool flag indicating whether to add annotations for results names - :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed - :param show_groups: bool flag indicating whether to show groups using bounding box - """ - exprs = element.recurse() - name = name_hint or element.customName or type(element).__name__ - - # Python's id() is used to provide a unique identifier for elements - el_id = id(element) - - element_results_name = element.resultsName - - # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram - if not element.customName: - if isinstance( - element, - ( - # pyparsing.TokenConverter, - # pyparsing.Forward, - pyparsing.Located, - ), - ): - # However, if this element has a useful custom name, and its child does not, we can pass it on to the child - if exprs: - if not exprs[0].customName: - propagated_name = name - else: - propagated_name = None - - return _to_diagram_element( - element.expr, - parent=parent, - lookup=lookup, - vertical=vertical, - index=index, - name_hint=propagated_name, - show_results_names=show_results_names, - show_groups=show_groups, - ) - - # If the element isn't worth extracting, we always treat it as the first time we say it - if _worth_extracting(element): - if el_id in lookup: - # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate, - # so we have to extract it into a new diagram. - looked_up = lookup[el_id] - looked_up.mark_for_extraction(el_id, lookup, name=name_hint) - ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name) - return ret - - elif el_id in lookup.diagrams: - # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we - # just put in a marker element that refers to the sub-diagram - ret = EditablePartial.from_call( - railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] - ) - return ret - - # Recursively convert child elements - # Here we find the most relevant Railroad element for matching pyparsing Element - # We use ``items=[]`` here to hold the place for where the child elements will go once created - if isinstance(element, pyparsing.And): - # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat - # (all will have the same name, and resultsName) - if not exprs: - return None - if len(set((e.name, e.resultsName) for e in exprs)) == 1: - ret = EditablePartial.from_call( - railroad.OneOrMore, item="", repeat=str(len(exprs)) - ) - elif _should_vertical(vertical, exprs): - ret = EditablePartial.from_call(railroad.Stack, items=[]) - else: - ret = EditablePartial.from_call(railroad.Sequence, items=[]) - elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)): - if not exprs: - return None - if _should_vertical(vertical, exprs): - ret = EditablePartial.from_call(railroad.Choice, 0, items=[]) - else: - ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[]) - elif isinstance(element, pyparsing.Each): - if not exprs: - return None - ret = EditablePartial.from_call(EachItem, items=[]) - elif isinstance(element, pyparsing.NotAny): - ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="") - elif isinstance(element, pyparsing.FollowedBy): - ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="") - elif isinstance(element, pyparsing.PrecededBy): - ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="") - elif isinstance(element, pyparsing.Group): - if show_groups: - ret = EditablePartial.from_call(AnnotatedItem, label="", item="") - else: - ret = EditablePartial.from_call(railroad.Group, label="", item="") - elif isinstance(element, pyparsing.TokenConverter): - label = type(element).__name__.lower() - if label == "tokenconverter": - ret = EditablePartial.from_call(railroad.Sequence, items=[]) - else: - ret = EditablePartial.from_call(AnnotatedItem, label=label, item="") - elif isinstance(element, pyparsing.Opt): - ret = EditablePartial.from_call(railroad.Optional, item="") - elif isinstance(element, pyparsing.OneOrMore): - ret = EditablePartial.from_call(railroad.OneOrMore, item="") - elif isinstance(element, pyparsing.ZeroOrMore): - ret = EditablePartial.from_call(railroad.ZeroOrMore, item="") - elif isinstance(element, pyparsing.Group): - ret = EditablePartial.from_call( - railroad.Group, item=None, label=element_results_name - ) - elif isinstance(element, pyparsing.Empty) and not element.customName: - # Skip unnamed "Empty" elements - ret = None - elif isinstance(element, pyparsing.ParseElementEnhance): - ret = EditablePartial.from_call(railroad.Sequence, items=[]) - elif len(exprs) > 0 and not element_results_name: - ret = EditablePartial.from_call(railroad.Group, item="", label=name) - elif len(exprs) > 0: - ret = EditablePartial.from_call(railroad.Sequence, items=[]) - else: - terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName) - ret = terminal - - if ret is None: - return - - # Indicate this element's position in the tree so we can extract it if necessary - lookup[el_id] = ElementState( - element=element, - converted=ret, - parent=parent, - parent_index=index, - number=lookup.generate_index(), - ) - if element.customName: - lookup[el_id].mark_for_extraction(el_id, lookup, element.customName) - - i = 0 - for expr in exprs: - # Add a placeholder index in case we have to extract the child before we even add it to the parent - if "items" in ret.kwargs: - ret.kwargs["items"].insert(i, None) - - item = _to_diagram_element( - expr, - parent=ret, - lookup=lookup, - vertical=vertical, - index=i, - show_results_names=show_results_names, - show_groups=show_groups, - ) - - # Some elements don't need to be shown in the diagram - if item is not None: - if "item" in ret.kwargs: - ret.kwargs["item"] = item - elif "items" in ret.kwargs: - # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal - ret.kwargs["items"][i] = item - i += 1 - elif "items" in ret.kwargs: - # If we're supposed to skip this element, remove it from the parent - del ret.kwargs["items"][i] - - # If all this items children are none, skip this item - if ret and ( - ("items" in ret.kwargs and len(ret.kwargs["items"]) == 0) - or ("item" in ret.kwargs and ret.kwargs["item"] is None) - ): - ret = EditablePartial.from_call(railroad.Terminal, name) - - # Mark this element as "complete", ie it has all of its children - if el_id in lookup: - lookup[el_id].complete = True - - if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete: - lookup.extract_into_diagram(el_id) - if ret is not None: - ret = EditablePartial.from_call( - railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"] - ) - - return ret diff --git a/src/pip/_vendor/pyparsing/exceptions.py b/src/pip/_vendor/pyparsing/exceptions.py deleted file mode 100644 index 1aaea56f54d..00000000000 --- a/src/pip/_vendor/pyparsing/exceptions.py +++ /dev/null @@ -1,300 +0,0 @@ -# exceptions.py - -import re -import sys -import typing - -from .util import ( - col, - line, - lineno, - _collapse_string_to_ranges, - replaced_by_pep8, -) -from .unicode import pyparsing_unicode as ppu - - -class _ExceptionWordUnicodeSet( - ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic -): - pass - - -_extract_alphanums = _collapse_string_to_ranges(_ExceptionWordUnicodeSet.alphanums) -_exception_word_extractor = re.compile("([" + _extract_alphanums + "]{1,16})|.") - - -class ParseBaseException(Exception): - """base exception class for all parsing runtime exceptions""" - - loc: int - msg: str - pstr: str - parser_element: typing.Any # "ParserElement" - args: typing.Tuple[str, int, typing.Optional[str]] - - __slots__ = ( - "loc", - "msg", - "pstr", - "parser_element", - "args", - ) - - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( - self, - pstr: str, - loc: int = 0, - msg: typing.Optional[str] = None, - elem=None, - ): - self.loc = loc - if msg is None: - self.msg = pstr - self.pstr = "" - else: - self.msg = msg - self.pstr = pstr - self.parser_element = elem - self.args = (pstr, loc, msg) - - @staticmethod - def explain_exception(exc, depth=16): - """ - Method to take an exception and translate the Python internal traceback into a list - of the pyparsing expressions that caused the exception to be raised. - - Parameters: - - - exc - exception raised during parsing (need not be a ParseException, in support - of Python exceptions that might be raised in a parse action) - - depth (default=16) - number of levels back in the stack trace to list expression - and function names; if None, the full stack trace names will be listed; if 0, only - the failing input line, marker, and exception string will be shown - - Returns a multi-line string listing the ParserElements and/or function names in the - exception's stack trace. - """ - import inspect - from .core import ParserElement - - if depth is None: - depth = sys.getrecursionlimit() - ret = [] - if isinstance(exc, ParseBaseException): - ret.append(exc.line) - ret.append(" " * (exc.column - 1) + "^") - ret.append(f"{type(exc).__name__}: {exc}") - - if depth <= 0: - return "\n".join(ret) - - callers = inspect.getinnerframes(exc.__traceback__, context=depth) - seen = set() - for ff in callers[-depth:]: - frm = ff[0] - - f_self = frm.f_locals.get("self", None) - if isinstance(f_self, ParserElement): - if not frm.f_code.co_name.startswith(("parseImpl", "_parseNoCache")): - continue - if id(f_self) in seen: - continue - seen.add(id(f_self)) - - self_type = type(f_self) - ret.append(f"{self_type.__module__}.{self_type.__name__} - {f_self}") - - elif f_self is not None: - self_type = type(f_self) - ret.append(f"{self_type.__module__}.{self_type.__name__}") - - else: - code = frm.f_code - if code.co_name in ("wrapper", ""): - continue - - ret.append(code.co_name) - - depth -= 1 - if not depth: - break - - return "\n".join(ret) - - @classmethod - def _from_exception(cls, pe): - """ - internal factory method to simplify creating one type of ParseException - from another - avoids having __init__ signature conflicts among subclasses - """ - return cls(pe.pstr, pe.loc, pe.msg, pe.parser_element) - - @property - def line(self) -> str: - """ - Return the line of text where the exception occurred. - """ - return line(self.loc, self.pstr) - - @property - def lineno(self) -> int: - """ - Return the 1-based line number of text where the exception occurred. - """ - return lineno(self.loc, self.pstr) - - @property - def col(self) -> int: - """ - Return the 1-based column on the line of text where the exception occurred. - """ - return col(self.loc, self.pstr) - - @property - def column(self) -> int: - """ - Return the 1-based column on the line of text where the exception occurred. - """ - return col(self.loc, self.pstr) - - # pre-PEP8 compatibility - @property - def parserElement(self): - return self.parser_element - - @parserElement.setter - def parserElement(self, elem): - self.parser_element = elem - - def __str__(self) -> str: - if self.pstr: - if self.loc >= len(self.pstr): - foundstr = ", found end of text" - else: - # pull out next word at error location - found_match = _exception_word_extractor.match(self.pstr, self.loc) - if found_match is not None: - found = found_match.group(0) - else: - found = self.pstr[self.loc : self.loc + 1] - foundstr = (", found %r" % found).replace(r"\\", "\\") - else: - foundstr = "" - return f"{self.msg}{foundstr} (at char {self.loc}), (line:{self.lineno}, col:{self.column})" - - def __repr__(self): - return str(self) - - def mark_input_line( - self, marker_string: typing.Optional[str] = None, *, markerString: str = ">!<" - ) -> str: - """ - Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. - """ - markerString = marker_string if marker_string is not None else markerString - line_str = self.line - line_column = self.column - 1 - if markerString: - line_str = "".join( - (line_str[:line_column], markerString, line_str[line_column:]) - ) - return line_str.strip() - - def explain(self, depth=16) -> str: - """ - Method to translate the Python internal traceback into a list - of the pyparsing expressions that caused the exception to be raised. - - Parameters: - - - depth (default=16) - number of levels back in the stack trace to list expression - and function names; if None, the full stack trace names will be listed; if 0, only - the failing input line, marker, and exception string will be shown - - Returns a multi-line string listing the ParserElements and/or function names in the - exception's stack trace. - - Example:: - - # an expression to parse 3 integers - expr = pp.Word(pp.nums) * 3 - try: - # a failing parse - the third integer is prefixed with "A" - expr.parse_string("123 456 A789") - except pp.ParseException as pe: - print(pe.explain(depth=0)) - - prints:: - - 123 456 A789 - ^ - ParseException: Expected W:(0-9), found 'A' (at char 8), (line:1, col:9) - - Note: the diagnostic output will include string representations of the expressions - that failed to parse. These representations will be more helpful if you use `set_name` to - give identifiable names to your expressions. Otherwise they will use the default string - forms, which may be cryptic to read. - - Note: pyparsing's default truncation of exception tracebacks may also truncate the - stack of expressions that are displayed in the ``explain`` output. To get the full listing - of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True`` - """ - return self.explain_exception(self, depth) - - # fmt: off - markInputline = replaced_by_pep8("markInputline", mark_input_line) - # fmt: on - - -class ParseException(ParseBaseException): - """ - Exception thrown when a parse expression doesn't match the input string - - Example:: - - integer = Word(nums).set_name("integer") - try: - integer.parse_string("ABC") - except ParseException as pe: - print(pe) - print(f"column: {pe.column}") - - prints:: - - Expected integer (at char 0), (line:1, col:1) column: 1 - - """ - - -class ParseFatalException(ParseBaseException): - """ - User-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately - """ - - -class ParseSyntaxException(ParseFatalException): - """ - Just like :class:`ParseFatalException`, but thrown internally - when an :class:`ErrorStop` ('-' operator) indicates - that parsing is to stop immediately because an unbacktrackable - syntax error has been found. - """ - - -class RecursiveGrammarException(Exception): - """ - Exception thrown by :class:`ParserElement.validate` if the - grammar could be left-recursive; parser may need to enable - left recursion using :class:`ParserElement.enable_left_recursion` - """ - - def __init__(self, parseElementList): - self.parseElementTrace = parseElementList - - def __str__(self) -> str: - return f"RecursiveGrammarException: {self.parseElementTrace}" diff --git a/src/pip/_vendor/pyparsing/helpers.py b/src/pip/_vendor/pyparsing/helpers.py deleted file mode 100644 index dcfdb8fe4bf..00000000000 --- a/src/pip/_vendor/pyparsing/helpers.py +++ /dev/null @@ -1,1078 +0,0 @@ -# helpers.py -import html.entities -import re -import sys -import typing - -from . import __diag__ -from .core import * -from .util import ( - _bslash, - _flatten, - _escape_regex_range_chars, - replaced_by_pep8, -) - - -# -# global helpers -# -def counted_array( - expr: ParserElement, - int_expr: typing.Optional[ParserElement] = None, - *, - intExpr: typing.Optional[ParserElement] = None, -) -> ParserElement: - """Helper to define a counted list of expressions. - - This helper defines a pattern of the form:: - - integer expr expr expr... - - where the leading integer tells how many expr expressions follow. - The matched tokens returns the array of expr tokens as a list - the - leading count token is suppressed. - - If ``int_expr`` is specified, it should be a pyparsing expression - that produces an integer value. - - Example:: - - counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd'] - - # in this parser, the leading integer value is given in binary, - # '10' indicating that 2 values are in the array - binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2)) - counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd'] - - # if other fields must be parsed after the count but before the - # list items, give the fields results names and they will - # be preserved in the returned ParseResults: - count_with_metadata = integer + Word(alphas)("type") - typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items") - result = typed_array.parse_string("3 bool True True False") - print(result.dump()) - - # prints - # ['True', 'True', 'False'] - # - items: ['True', 'True', 'False'] - # - type: 'bool' - """ - intExpr = intExpr or int_expr - array_expr = Forward() - - def count_field_parse_action(s, l, t): - nonlocal array_expr - n = t[0] - array_expr <<= (expr * n) if n else Empty() - # clear list contents, but keep any named results - del t[:] - - if intExpr is None: - intExpr = Word(nums).set_parse_action(lambda t: int(t[0])) - else: - intExpr = intExpr.copy() - intExpr.set_name("arrayLen") - intExpr.add_parse_action(count_field_parse_action, call_during_try=True) - return (intExpr + array_expr).set_name(f"(len) {expr}...") - - -def match_previous_literal(expr: ParserElement) -> ParserElement: - """Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks for - a 'repeat' of a previous expression. For example:: - - first = Word(nums) - second = match_previous_literal(first) - match_expr = first + ":" + second - - will match ``"1:1"``, but not ``"1:2"``. Because this - matches a previous literal, will also match the leading - ``"1:1"`` in ``"1:10"``. If this is not desired, use - :class:`match_previous_expr`. Do *not* use with packrat parsing - enabled. - """ - rep = Forward() - - def copy_token_to_repeater(s, l, t): - if not t: - rep << Empty() - return - - if len(t) == 1: - rep << t[0] - return - - # flatten t tokens - tflat = _flatten(t.as_list()) - rep << And(Literal(tt) for tt in tflat) - - expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) - rep.set_name("(prev) " + str(expr)) - return rep - - -def match_previous_expr(expr: ParserElement) -> ParserElement: - """Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks for - a 'repeat' of a previous expression. For example:: - - first = Word(nums) - second = match_previous_expr(first) - match_expr = first + ":" + second - - will match ``"1:1"``, but not ``"1:2"``. Because this - matches by expressions, will *not* match the leading ``"1:1"`` - in ``"1:10"``; the expressions are evaluated first, and then - compared, so ``"1"`` is compared with ``"10"``. Do *not* use - with packrat parsing enabled. - """ - rep = Forward() - e2 = expr.copy() - rep <<= e2 - - def copy_token_to_repeater(s, l, t): - matchTokens = _flatten(t.as_list()) - - def must_match_these_tokens(s, l, t): - theseTokens = _flatten(t.as_list()) - if theseTokens != matchTokens: - raise ParseException( - s, l, f"Expected {matchTokens}, found{theseTokens}" - ) - - rep.set_parse_action(must_match_these_tokens, callDuringTry=True) - - expr.add_parse_action(copy_token_to_repeater, callDuringTry=True) - rep.set_name("(prev) " + str(expr)) - return rep - - -def one_of( - strs: Union[typing.Iterable[str], str], - caseless: bool = False, - use_regex: bool = True, - as_keyword: bool = False, - *, - useRegex: bool = True, - asKeyword: bool = False, -) -> ParserElement: - """Helper to quickly define a set of alternative :class:`Literal` s, - and makes sure to do longest-first testing when there is a conflict, - regardless of the input order, but returns - a :class:`MatchFirst` for best performance. - - Parameters: - - - ``strs`` - a string of space-delimited literals, or a collection of - string literals - - ``caseless`` - treat all literals as caseless - (default= ``False``) - - ``use_regex`` - as an optimization, will - generate a :class:`Regex` object; otherwise, will generate - a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if - creating a :class:`Regex` raises an exception) - (default= ``True``) - - ``as_keyword`` - enforce :class:`Keyword`-style matching on the - generated expressions - (default= ``False``) - - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility, - but will be removed in a future release - - Example:: - - comp_oper = one_of("< = > <= >= !=") - var = Word(alphas) - number = Word(nums) - term = var | number - comparison_expr = term + comp_oper + term - print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12")) - - prints:: - - [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] - """ - asKeyword = asKeyword or as_keyword - useRegex = useRegex and use_regex - - if ( - isinstance(caseless, str_type) - and __diag__.warn_on_multiple_string_args_to_oneof - ): - warnings.warn( - "More than one string argument passed to one_of, pass" - " choices as a list or space-delimited string", - stacklevel=2, - ) - - if caseless: - isequal = lambda a, b: a.upper() == b.upper() - masks = lambda a, b: b.upper().startswith(a.upper()) - parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral - else: - isequal = lambda a, b: a == b - masks = lambda a, b: b.startswith(a) - parseElementClass = Keyword if asKeyword else Literal - - symbols: List[str] = [] - if isinstance(strs, str_type): - strs = typing.cast(str, strs) - symbols = strs.split() - elif isinstance(strs, Iterable): - symbols = list(strs) - else: - raise TypeError("Invalid argument to one_of, expected string or iterable") - if not symbols: - return NoMatch() - - # reorder given symbols to take care to avoid masking longer choices with shorter ones - # (but only if the given symbols are not just single characters) - if any(len(sym) > 1 for sym in symbols): - i = 0 - while i < len(symbols) - 1: - cur = symbols[i] - for j, other in enumerate(symbols[i + 1 :]): - if isequal(other, cur): - del symbols[i + j + 1] - break - if masks(cur, other): - del symbols[i + j + 1] - symbols.insert(i, other) - break - else: - i += 1 - - if useRegex: - re_flags: int = re.IGNORECASE if caseless else 0 - - try: - if all(len(sym) == 1 for sym in symbols): - # symbols are just single characters, create range regex pattern - patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]" - else: - patt = "|".join(re.escape(sym) for sym in symbols) - - # wrap with \b word break markers if defining as keywords - if asKeyword: - patt = rf"\b(?:{patt})\b" - - ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) - - if caseless: - # add parse action to return symbols as specified, not in random - # casing as found in input string - symbol_map = {sym.lower(): sym for sym in symbols} - ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) - - return ret - - except re.error: - warnings.warn( - "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 - ) - - # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( - " | ".join(symbols) - ) - - -def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: - """Helper to easily and clearly define a dictionary by specifying - the respective patterns for the key and value. Takes care of - defining the :class:`Dict`, :class:`ZeroOrMore`, and - :class:`Group` tokens in the proper order. The key pattern - can include delimiting markers or punctuation, as long as they are - suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the :class:`Dict` results - can include named token fields. - - Example:: - - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) - print(attr_expr[1, ...].parse_string(text).dump()) - - attr_label = label - attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join) - - # similar to Dict, but simpler call format - result = dict_of(attr_label, attr_value).parse_string(text) - print(result.dump()) - print(result['shape']) - print(result.shape) # object attribute access works too - print(result.as_dict()) - - prints:: - - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: 'light blue' - - posn: 'upper left' - - shape: 'SQUARE' - - texture: 'burlap' - SQUARE - SQUARE - {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} - """ - return Dict(OneOrMore(Group(key + value))) - - -def original_text_for( - expr: ParserElement, as_string: bool = True, *, asString: bool = True -) -> ParserElement: - """Helper to return the original, untokenized text for a given - expression. Useful to restore the parsed fields of an HTML start - tag into the raw tag text itself, or to revert separate tokens with - intervening whitespace back to the original matching input text. By - default, returns a string containing the original parsed text. - - If the optional ``as_string`` argument is passed as - ``False``, then the return value is - a :class:`ParseResults` containing any results names that - were originally matched, and a single token containing the original - matched text from the input string. So if the expression passed to - :class:`original_text_for` contains expressions with defined - results names, you must set ``as_string`` to ``False`` if you - want to preserve those results name values. - - The ``asString`` pre-PEP8 argument is retained for compatibility, - but will be removed in a future release. - - Example:: - - src = "this is test bold text normal text " - for tag in ("b", "i"): - opener, closer = make_html_tags(tag) - patt = original_text_for(opener + ... + closer) - print(patt.search_string(src)[0]) - - prints:: - - [' bold text '] - ['text'] - """ - asString = asString and as_string - - locMarker = Empty().set_parse_action(lambda s, loc, t: loc) - endlocMarker = locMarker.copy() - endlocMarker.callPreparse = False - matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") - if asString: - extractText = lambda s, l, t: s[t._original_start : t._original_end] - else: - - def extractText(s, l, t): - t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] - - matchExpr.set_parse_action(extractText) - matchExpr.ignoreExprs = expr.ignoreExprs - matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection) - return matchExpr - - -def ungroup(expr: ParserElement) -> ParserElement: - """Helper to undo pyparsing's default grouping of And expressions, - even if all but one are non-empty. - """ - return TokenConverter(expr).add_parse_action(lambda t: t[0]) - - -def locatedExpr(expr: ParserElement) -> ParserElement: - """ - (DEPRECATED - future code should use the :class:`Located` class) - Helper to decorate a returned token with its starting and ending - locations in the input string. - - This helper adds the following results names: - - - ``locn_start`` - location where matched expression begins - - ``locn_end`` - location where matched expression ends - - ``value`` - the actual parsed results - - Be careful if the input text contains ```` characters, you - may want to call :class:`ParserElement.parse_with_tabs` - - Example:: - - wd = Word(alphas) - for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): - print(match) - - prints:: - - [[0, 'ljsdf', 5]] - [[8, 'lksdjjf', 15]] - [[18, 'lkkjj', 23]] - """ - locator = Empty().set_parse_action(lambda ss, ll, tt: ll) - return Group( - locator("locn_start") - + expr("value") - + locator.copy().leaveWhitespace()("locn_end") - ) - - -def nested_expr( - opener: Union[str, ParserElement] = "(", - closer: Union[str, ParserElement] = ")", - content: typing.Optional[ParserElement] = None, - ignore_expr: ParserElement = quoted_string(), - *, - ignoreExpr: ParserElement = quoted_string(), -) -> ParserElement: - """Helper method for defining nested lists enclosed in opening and - closing delimiters (``"("`` and ``")"`` are the default). - - Parameters: - - - ``opener`` - opening character for a nested list - (default= ``"("``); can also be a pyparsing expression - - ``closer`` - closing character for a nested list - (default= ``")"``); can also be a pyparsing expression - - ``content`` - expression for items within the nested lists - (default= ``None``) - - ``ignore_expr`` - expression for ignoring opening and closing delimiters - (default= :class:`quoted_string`) - - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility - but will be removed in a future release - - If an expression is not provided for the content argument, the - nested expression will capture all whitespace-delimited content - between delimiters as a list of separate values. - - Use the ``ignore_expr`` argument to define expressions that may - contain opening or closing characters that should not be treated as - opening or closing characters for nesting, such as quoted_string or - a comment expression. Specify multiple expressions using an - :class:`Or` or :class:`MatchFirst`. The default is - :class:`quoted_string`, but if no expressions are to be ignored, then - pass ``None`` for this argument. - - Example:: - - data_type = one_of("void int short long char float double") - decl_data_type = Combine(data_type + Opt(Word('*'))) - ident = Word(alphas+'_', alphanums+'_') - number = pyparsing_common.number - arg = Group(decl_data_type + ident) - LPAR, RPAR = map(Suppress, "()") - - code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment)) - - c_function = (decl_data_type("type") - + ident("name") - + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR - + code_body("body")) - c_function.ignore(c_style_comment) - - source_code = ''' - int is_odd(int x) { - return (x%2); - } - - int dec_to_hex(char hchar) { - if (hchar >= '0' && hchar <= '9') { - return (ord(hchar)-ord('0')); - } else { - return (10+ord(hchar)-ord('A')); - } - } - ''' - for func in c_function.search_string(source_code): - print("%(name)s (%(type)s) args: %(args)s" % func) - - - prints:: - - is_odd (int) args: [['int', 'x']] - dec_to_hex (int) args: [['char', 'hchar']] - """ - if ignoreExpr != ignore_expr: - ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr - if opener == closer: - raise ValueError("opening and closing strings cannot be the same") - if content is None: - if isinstance(opener, str_type) and isinstance(closer, str_type): - opener = typing.cast(str, opener) - closer = typing.cast(str, closer) - if len(opener) == 1 and len(closer) == 1: - if ignoreExpr is not None: - content = Combine( - OneOrMore( - ~ignoreExpr - + CharsNotIn( - opener + closer + ParserElement.DEFAULT_WHITE_CHARS, - exact=1, - ) - ) - ).set_parse_action(lambda t: t[0].strip()) - else: - content = empty.copy() + CharsNotIn( - opener + closer + ParserElement.DEFAULT_WHITE_CHARS - ).set_parse_action(lambda t: t[0].strip()) - else: - if ignoreExpr is not None: - content = Combine( - OneOrMore( - ~ignoreExpr - + ~Literal(opener) - + ~Literal(closer) - + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) - ) - ).set_parse_action(lambda t: t[0].strip()) - else: - content = Combine( - OneOrMore( - ~Literal(opener) - + ~Literal(closer) - + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) - ) - ).set_parse_action(lambda t: t[0].strip()) - else: - raise ValueError( - "opening and closing arguments must be strings if no content expression is given" - ) - ret = Forward() - if ignoreExpr is not None: - ret <<= Group( - Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) - ) - else: - ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) - ret.set_name(f"nested {opener}{closer} expression") - # don't override error message from content expressions - ret.errmsg = None - return ret - - -def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): - """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr, str_type): - resname = tagStr - tagStr = Keyword(tagStr, caseless=not xml) - else: - resname = tagStr.name - - tagAttrName = Word(alphas, alphanums + "_-:") - if xml: - tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes) - openTag = ( - suppress_LT - + tagStr("tag") - + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) - + Opt("/", default=[False])("empty").set_parse_action( - lambda s, l, t: t[0] == "/" - ) - + suppress_GT - ) - else: - tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word( - printables, exclude_chars=">" - ) - openTag = ( - suppress_LT - + tagStr("tag") - + Dict( - ZeroOrMore( - Group( - tagAttrName.set_parse_action(lambda t: t[0].lower()) - + Opt(Suppress("=") + tagAttrValue) - ) - ) - ) - + Opt("/", default=[False])("empty").set_parse_action( - lambda s, l, t: t[0] == "/" - ) - + suppress_GT - ) - closeTag = Combine(Literal("", adjacent=False) - - openTag.set_name(f"<{resname}>") - # add start results name in parse action now that ungrouped names are not reported at two levels - openTag.add_parse_action( - lambda t: t.__setitem__( - "start" + "".join(resname.replace(":", " ").title().split()), t.copy() - ) - ) - closeTag = closeTag( - "end" + "".join(resname.replace(":", " ").title().split()) - ).set_name(f"") - openTag.tag = resname - closeTag.tag = resname - openTag.tag_body = SkipTo(closeTag()) - return openTag, closeTag - - -def make_html_tags( - tag_str: Union[str, ParserElement] -) -> Tuple[ParserElement, ParserElement]: - """Helper to construct opening and closing tag expressions for HTML, - given a tag name. Matches tags in either upper or lower case, - attributes with namespaces and with quoted or unquoted values. - - Example:: - - text = 'More info at the pyparsing wiki page' - # make_html_tags returns pyparsing expressions for the opening and - # closing tags as a 2-tuple - a, a_end = make_html_tags("A") - link_expr = a + SkipTo(a_end)("link_text") + a_end - - for link in link_expr.search_string(text): - # attributes in the tag (like "href" shown here) are - # also accessible as named results - print(link.link_text, '->', link.href) - - prints:: - - pyparsing -> https://github.com/pyparsing/pyparsing/wiki - """ - return _makeTags(tag_str, False) - - -def make_xml_tags( - tag_str: Union[str, ParserElement] -) -> Tuple[ParserElement, ParserElement]: - """Helper to construct opening and closing tag expressions for XML, - given a tag name. Matches tags only in the given upper/lower case. - - Example: similar to :class:`make_html_tags` - """ - return _makeTags(tag_str, True) - - -any_open_tag: ParserElement -any_close_tag: ParserElement -any_open_tag, any_close_tag = make_html_tags( - Word(alphas, alphanums + "_:").set_name("any tag") -) - -_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} -common_html_entity = Regex("&(?P" + "|".join(_htmlEntityMap) + ");").set_name( - "common HTML entity" -) - - -def replace_html_entity(s, l, t): - """Helper parser action to replace common HTML entities with their special characters""" - return _htmlEntityMap.get(t.entity) - - -class OpAssoc(Enum): - """Enumeration of operator associativity - - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`""" - - LEFT = 1 - RIGHT = 2 - - -InfixNotationOperatorArgType = Union[ - ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] -] -InfixNotationOperatorSpec = Union[ - Tuple[ - InfixNotationOperatorArgType, - int, - OpAssoc, - typing.Optional[ParseAction], - ], - Tuple[ - InfixNotationOperatorArgType, - int, - OpAssoc, - ], -] - - -def infix_notation( - base_expr: ParserElement, - op_list: List[InfixNotationOperatorSpec], - lpar: Union[str, ParserElement] = Suppress("("), - rpar: Union[str, ParserElement] = Suppress(")"), -) -> ParserElement: - """Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary - or binary, left- or right-associative. Parse actions can also be - attached to operator expressions. The generated parser will also - recognize the use of parentheses to override operator precedences - (see example below). - - Note: if you define a deep operator list, you may see performance - issues when using infix_notation. See - :class:`ParserElement.enable_packrat` for a mechanism to potentially - improve your parser performance. - - Parameters: - - - ``base_expr`` - expression representing the most basic operand to - be used in the expression - - ``op_list`` - list of tuples, one for each operator precedence level - in the expression grammar; each tuple is of the form ``(op_expr, - num_operands, right_left_assoc, (optional)parse_action)``, where: - - - ``op_expr`` is the pyparsing expression for the operator; may also - be a string, which will be converted to a Literal; if ``num_operands`` - is 3, ``op_expr`` is a tuple of two expressions, for the two - operators separating the 3 terms - - ``num_operands`` is the number of terms for this operator (must be 1, - 2, or 3) - - ``right_left_assoc`` is the indicator whether the operator is right - or left associative, using the pyparsing-defined constants - ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``. - - ``parse_action`` is the parse action to be associated with - expressions matching this operator expression (the parse action - tuple member may be omitted); if the parse action is passed - a tuple or list of functions, this is equivalent to calling - ``set_parse_action(*fn)`` - (:class:`ParserElement.set_parse_action`) - - ``lpar`` - expression for matching left-parentheses; if passed as a - str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as - an expression (such as ``Literal('(')``), then it will be kept in - the parsed results, and grouped with them. (default= ``Suppress('(')``) - - ``rpar`` - expression for matching right-parentheses; if passed as a - str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as - an expression (such as ``Literal(')')``), then it will be kept in - the parsed results, and grouped with them. (default= ``Suppress(')')``) - - Example:: - - # simple example of four-function arithmetic with ints and - # variable names - integer = pyparsing_common.signed_integer - varname = pyparsing_common.identifier - - arith_expr = infix_notation(integer | varname, - [ - ('-', 1, OpAssoc.RIGHT), - (one_of('* /'), 2, OpAssoc.LEFT), - (one_of('+ -'), 2, OpAssoc.LEFT), - ]) - - arith_expr.run_tests(''' - 5+3*6 - (5+3)*6 - -2--11 - ''', full_dump=False) - - prints:: - - 5+3*6 - [[5, '+', [3, '*', 6]]] - - (5+3)*6 - [[[5, '+', 3], '*', 6]] - - (5+x)*y - [[[5, '+', 'x'], '*', 'y']] - - -2--11 - [[['-', 2], '-', ['-', 11]]] - """ - - # captive version of FollowedBy that does not do parse actions or capture results names - class _FB(FollowedBy): - def parseImpl(self, instring, loc, doActions=True): - self.expr.try_parse(instring, loc) - return loc, [] - - _FB.__name__ = "FollowedBy>" - - ret = Forward() - if isinstance(lpar, str): - lpar = Suppress(lpar) - if isinstance(rpar, str): - rpar = Suppress(rpar) - - # if lpar and rpar are not suppressed, wrap in group - if not (isinstance(lpar, Suppress) and isinstance(rpar, Suppress)): - lastExpr = base_expr | Group(lpar + ret + rpar) - else: - lastExpr = base_expr | (lpar + ret + rpar) - - arity: int - rightLeftAssoc: opAssoc - pa: typing.Optional[ParseAction] - opExpr1: ParserElement - opExpr2: ParserElement - for operDef in op_list: - opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment] - if isinstance(opExpr, str_type): - opExpr = ParserElement._literalStringClass(opExpr) - opExpr = typing.cast(ParserElement, opExpr) - if arity == 3: - if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2: - raise ValueError( - "if numterms=3, opExpr must be a tuple or list of two expressions" - ) - opExpr1, opExpr2 = opExpr - term_name = f"{opExpr1}{opExpr2} term" - else: - term_name = f"{opExpr} term" - - if not 1 <= arity <= 3: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - - if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT): - raise ValueError("operator must indicate right or left associativity") - - thisExpr: ParserElement = Forward().set_name(term_name) - thisExpr = typing.cast(Forward, thisExpr) - if rightLeftAssoc is OpAssoc.LEFT: - if arity == 1: - matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...]) - elif arity == 2: - if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( - lastExpr + (opExpr + lastExpr)[1, ...] - ) - else: - matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...]) - elif arity == 3: - matchExpr = _FB( - lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr - ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)) - elif rightLeftAssoc is OpAssoc.RIGHT: - if arity == 1: - # try to avoid LR with this extra test - if not isinstance(opExpr, Opt): - opExpr = Opt(opExpr) - matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) - elif arity == 2: - if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( - lastExpr + (opExpr + thisExpr)[1, ...] - ) - else: - matchExpr = _FB(lastExpr + thisExpr) + Group( - lastExpr + thisExpr[1, ...] - ) - elif arity == 3: - matchExpr = _FB( - lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr - ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) - if pa: - if isinstance(pa, (tuple, list)): - matchExpr.set_parse_action(*pa) - else: - matchExpr.set_parse_action(pa) - thisExpr <<= (matchExpr | lastExpr).setName(term_name) - lastExpr = thisExpr - ret <<= lastExpr - return ret - - -def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]): - """ - (DEPRECATED - use :class:`IndentedBlock` class instead) - Helper method for defining space-delimited indentation blocks, - such as those used to define block statements in Python source code. - - Parameters: - - - ``blockStatementExpr`` - expression defining syntax of statement that - is repeated within the indented block - - ``indentStack`` - list created by caller to manage indentation stack - (multiple ``statementWithIndentedBlock`` expressions within a single - grammar should share a common ``indentStack``) - - ``indent`` - boolean indicating whether block must be indented beyond - the current level; set to ``False`` for block of left-most statements - (default= ``True``) - - A valid block must contain at least one ``blockStatement``. - - (Note that indentedBlock uses internal parse actions which make it - incompatible with packrat parsing.) - - Example:: - - data = ''' - def A(z): - A1 - B = 100 - G = A2 - A2 - A3 - B - def BB(a,b,c): - BB1 - def BBA(): - bba1 - bba2 - bba3 - C - D - def spam(x,y): - def eggs(z): - pass - ''' - - - indentStack = [1] - stmt = Forward() - - identifier = Word(alphas, alphanums) - funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":") - func_body = indentedBlock(stmt, indentStack) - funcDef = Group(funcDecl + func_body) - - rvalue = Forward() - funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")") - rvalue << (funcCall | identifier | Word(nums)) - assignment = Group(identifier + "=" + rvalue) - stmt << (funcDef | assignment | identifier) - - module_body = stmt[1, ...] - - parseTree = module_body.parseString(data) - parseTree.pprint() - - prints:: - - [['def', - 'A', - ['(', 'z', ')'], - ':', - [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], - 'B', - ['def', - 'BB', - ['(', 'a', 'b', 'c', ')'], - ':', - [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], - 'C', - 'D', - ['def', - 'spam', - ['(', 'x', 'y', ')'], - ':', - [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] - """ - backup_stacks.append(indentStack[:]) - - def reset_stack(): - indentStack[:] = backup_stacks[-1] - - def checkPeerIndent(s, l, t): - if l >= len(s): - return - curCol = col(l, s) - if curCol != indentStack[-1]: - if curCol > indentStack[-1]: - raise ParseException(s, l, "illegal nesting") - raise ParseException(s, l, "not a peer entry") - - def checkSubIndent(s, l, t): - curCol = col(l, s) - if curCol > indentStack[-1]: - indentStack.append(curCol) - else: - raise ParseException(s, l, "not a subentry") - - def checkUnindent(s, l, t): - if l >= len(s): - return - curCol = col(l, s) - if not (indentStack and curCol in indentStack): - raise ParseException(s, l, "not an unindent") - if curCol < indentStack[-1]: - indentStack.pop() - - NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress()) - INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT") - PEER = Empty().set_parse_action(checkPeerIndent).set_name("") - UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT") - if indent: - smExpr = Group( - Opt(NL) - + INDENT - + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) - + UNDENT - ) - else: - smExpr = Group( - Opt(NL) - + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL)) - + Opt(UNDENT) - ) - - # add a parse action to remove backup_stack from list of backups - smExpr.add_parse_action( - lambda: backup_stacks.pop(-1) and None if backup_stacks else None - ) - smExpr.set_fail_action(lambda a, b, c, d: reset_stack()) - blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr.set_name("indented block") - - -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name( - "C style comment" -) -"Comment of the form ``/* ... */``" - -html_comment = Regex(r"").set_name("HTML comment") -"Comment of the form ````" - -rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line") -dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment") -"Comment of the form ``// ... (to end of line)``" - -cpp_style_comment = Combine( - Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment -).set_name("C++ style comment") -"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`" - -java_style_comment = cpp_style_comment -"Same as :class:`cpp_style_comment`" - -python_style_comment = Regex(r"#.*").set_name("Python style comment") -"Comment of the form ``# ... (to end of line)``" - - -# build list of built-in expressions, for future reference if a global default value -# gets updated -_builtin_exprs: List[ParserElement] = [ - v for v in vars().values() if isinstance(v, ParserElement) -] - - -# compatibility function, superseded by DelimitedList class -def delimited_list( - expr: Union[str, ParserElement], - delim: Union[str, ParserElement] = ",", - combine: bool = False, - min: typing.Optional[int] = None, - max: typing.Optional[int] = None, - *, - allow_trailing_delim: bool = False, -) -> ParserElement: - """(DEPRECATED - use :class:`DelimitedList` class)""" - return DelimitedList( - expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim - ) - - -# pre-PEP8 compatible names -# fmt: off -opAssoc = OpAssoc -anyOpenTag = any_open_tag -anyCloseTag = any_close_tag -commonHTMLEntity = common_html_entity -cStyleComment = c_style_comment -htmlComment = html_comment -restOfLine = rest_of_line -dblSlashComment = dbl_slash_comment -cppStyleComment = cpp_style_comment -javaStyleComment = java_style_comment -pythonStyleComment = python_style_comment -delimitedList = replaced_by_pep8("delimitedList", DelimitedList) -delimited_list = replaced_by_pep8("delimited_list", DelimitedList) -countedArray = replaced_by_pep8("countedArray", counted_array) -matchPreviousLiteral = replaced_by_pep8("matchPreviousLiteral", match_previous_literal) -matchPreviousExpr = replaced_by_pep8("matchPreviousExpr", match_previous_expr) -oneOf = replaced_by_pep8("oneOf", one_of) -dictOf = replaced_by_pep8("dictOf", dict_of) -originalTextFor = replaced_by_pep8("originalTextFor", original_text_for) -nestedExpr = replaced_by_pep8("nestedExpr", nested_expr) -makeHTMLTags = replaced_by_pep8("makeHTMLTags", make_html_tags) -makeXMLTags = replaced_by_pep8("makeXMLTags", make_xml_tags) -replaceHTMLEntity = replaced_by_pep8("replaceHTMLEntity", replace_html_entity) -infixNotation = replaced_by_pep8("infixNotation", infix_notation) -# fmt: on diff --git a/src/pip/_vendor/pyparsing/py.typed b/src/pip/_vendor/pyparsing/py.typed deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/pip/_vendor/pyparsing/results.py b/src/pip/_vendor/pyparsing/results.py deleted file mode 100644 index 3e5fe2089bd..00000000000 --- a/src/pip/_vendor/pyparsing/results.py +++ /dev/null @@ -1,797 +0,0 @@ -# results.py -from collections.abc import ( - MutableMapping, - Mapping, - MutableSequence, - Iterator, - Sequence, - Container, -) -import pprint -from typing import Tuple, Any, Dict, Set, List - -str_type: Tuple[type, ...] = (str, bytes) -_generator_type = type((_ for _ in ())) - - -class _ParseResultsWithOffset: - tup: Tuple["ParseResults", int] - __slots__ = ["tup"] - - def __init__(self, p1: "ParseResults", p2: int): - self.tup: Tuple[ParseResults, int] = (p1, p2) - - def __getitem__(self, i): - return self.tup[i] - - def __getstate__(self): - return self.tup - - def __setstate__(self, *args): - self.tup = args[0] - - -class ParseResults: - """Structured parse results, to provide multiple means of access to - the parsed data: - - - as a list (``len(results)``) - - by list index (``results[0], results[1]``, etc.) - - by attribute (``results.`` - see :class:`ParserElement.set_results_name`) - - Example:: - - integer = Word(nums) - date_str = (integer.set_results_name("year") + '/' - + integer.set_results_name("month") + '/' - + integer.set_results_name("day")) - # equivalent form: - # date_str = (integer("year") + '/' - # + integer("month") + '/' - # + integer("day")) - - # parse_string returns a ParseResults object - result = date_str.parse_string("1999/12/31") - - def test(s, fn=repr): - print(f"{s} -> {fn(eval(s))}") - test("list(result)") - test("result[0]") - test("result['month']") - test("result.day") - test("'month' in result") - test("'minutes' in result") - test("result.dump()", str) - - prints:: - - list(result) -> ['1999', '/', '12', '/', '31'] - result[0] -> '1999' - result['month'] -> '12' - result.day -> '31' - 'month' in result -> True - 'minutes' in result -> False - result.dump() -> ['1999', '/', '12', '/', '31'] - - day: '31' - - month: '12' - - year: '1999' - """ - - _null_values: Tuple[Any, ...] = (None, [], ()) - - _name: str - _parent: "ParseResults" - _all_names: Set[str] - _modal: bool - _toklist: List[Any] - _tokdict: Dict[str, Any] - - __slots__ = ( - "_name", - "_parent", - "_all_names", - "_modal", - "_toklist", - "_tokdict", - ) - - class List(list): - """ - Simple wrapper class to distinguish parsed list results that should be preserved - as actual Python lists, instead of being converted to :class:`ParseResults`:: - - LBRACK, RBRACK = map(pp.Suppress, "[]") - element = pp.Forward() - item = ppc.integer - element_list = LBRACK + pp.DelimitedList(element) + RBRACK - - # add parse actions to convert from ParseResults to actual Python collection types - def as_python_list(t): - return pp.ParseResults.List(t.as_list()) - element_list.add_parse_action(as_python_list) - - element <<= item | element_list - - element.run_tests(''' - 100 - [2,3,4] - [[2, 1],3,4] - [(2, 1),3,4] - (2,3,4) - ''', post_parse=lambda s, r: (r[0], type(r[0]))) - - prints:: - - 100 - (100, ) - - [2,3,4] - ([2, 3, 4], ) - - [[2, 1],3,4] - ([[2, 1], 3, 4], ) - - (Used internally by :class:`Group` when `aslist=True`.) - """ - - def __new__(cls, contained=None): - if contained is None: - contained = [] - - if not isinstance(contained, list): - raise TypeError( - f"{cls.__name__} may only be constructed with a list, not {type(contained).__name__}" - ) - - return list.__new__(cls) - - def __new__(cls, toklist=None, name=None, **kwargs): - if isinstance(toklist, ParseResults): - return toklist - self = object.__new__(cls) - self._name = None - self._parent = None - self._all_names = set() - - if toklist is None: - self._toklist = [] - elif isinstance(toklist, (list, _generator_type)): - self._toklist = ( - [toklist[:]] - if isinstance(toklist, ParseResults.List) - else list(toklist) - ) - else: - self._toklist = [toklist] - self._tokdict = dict() - return self - - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( - self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance - ): - self._tokdict: Dict[str, _ParseResultsWithOffset] - self._modal = modal - - if name is None or name == "": - return - - if isinstance(name, int): - name = str(name) - - if not modal: - self._all_names = {name} - - self._name = name - - if toklist in self._null_values: - return - - if isinstance(toklist, (str_type, type)): - toklist = [toklist] - - if asList: - if isinstance(toklist, ParseResults): - self[name] = _ParseResultsWithOffset(ParseResults(toklist._toklist), 0) - else: - self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0) - self[name]._name = name - return - - try: - self[name] = toklist[0] - except (KeyError, TypeError, IndexError): - if toklist is not self: - self[name] = toklist - else: - self._name = name - - def __getitem__(self, i): - if isinstance(i, (int, slice)): - return self._toklist[i] - - if i not in self._all_names: - return self._tokdict[i][-1][0] - - return ParseResults([v[0] for v in self._tokdict[i]]) - - def __setitem__(self, k, v, isinstance=isinstance): - if isinstance(v, _ParseResultsWithOffset): - self._tokdict[k] = self._tokdict.get(k, list()) + [v] - sub = v[0] - elif isinstance(k, (int, slice)): - self._toklist[k] = v - sub = v - else: - self._tokdict[k] = self._tokdict.get(k, list()) + [ - _ParseResultsWithOffset(v, 0) - ] - sub = v - if isinstance(sub, ParseResults): - sub._parent = self - - def __delitem__(self, i): - if not isinstance(i, (int, slice)): - del self._tokdict[i] - return - - mylen = len(self._toklist) - del self._toklist[i] - - # convert int to slice - if isinstance(i, int): - if i < 0: - i += mylen - i = slice(i, i + 1) - # get removed indices - removed = list(range(*i.indices(mylen))) - removed.reverse() - # fixup indices in token dictionary - for occurrences in self._tokdict.values(): - for j in removed: - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset( - value, position - (position > j) - ) - - def __contains__(self, k) -> bool: - return k in self._tokdict - - def __len__(self) -> int: - return len(self._toklist) - - def __bool__(self) -> bool: - return not not (self._toklist or self._tokdict) - - def __iter__(self) -> Iterator: - return iter(self._toklist) - - def __reversed__(self) -> Iterator: - return iter(self._toklist[::-1]) - - def keys(self): - return iter(self._tokdict) - - def values(self): - return (self[k] for k in self.keys()) - - def items(self): - return ((k, self[k]) for k in self.keys()) - - def haskeys(self) -> bool: - """ - Since ``keys()`` returns an iterator, this method is helpful in bypassing - code that looks for the existence of any defined results names.""" - return not not self._tokdict - - def pop(self, *args, **kwargs): - """ - Removes and returns item at specified index (default= ``last``). - Supports both ``list`` and ``dict`` semantics for ``pop()``. If - passed no argument or an integer argument, it will use ``list`` - semantics and pop tokens from the list of parsed tokens. If passed - a non-integer argument (most likely a string), it will use ``dict`` - semantics and pop the corresponding value from any defined results - names. A second default return value argument is supported, just as in - ``dict.pop()``. - - Example:: - - numlist = Word(nums)[...] - print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] - - def remove_first(tokens): - tokens.pop(0) - numlist.add_parse_action(remove_first) - print(numlist.parse_string("0 123 321")) # -> ['123', '321'] - - label = Word(alphas) - patt = label("LABEL") + Word(nums)[1, ...] - print(patt.parse_string("AAB 123 321").dump()) - - # Use pop() in a parse action to remove named result (note that corresponding value is not - # removed from list form of results) - def remove_LABEL(tokens): - tokens.pop("LABEL") - return tokens - patt.add_parse_action(remove_LABEL) - print(patt.parse_string("AAB 123 321").dump()) - - prints:: - - ['AAB', '123', '321'] - - LABEL: 'AAB' - - ['AAB', '123', '321'] - """ - if not args: - args = [-1] - for k, v in kwargs.items(): - if k == "default": - args = (args[0], v) - else: - raise TypeError(f"pop() got an unexpected keyword argument {k!r}") - if isinstance(args[0], int) or len(args) == 1 or args[0] in self: - index = args[0] - ret = self[index] - del self[index] - return ret - else: - defaultvalue = args[1] - return defaultvalue - - def get(self, key, default_value=None): - """ - Returns named result matching the given key, or if there is no - such name, then returns the given ``default_value`` or ``None`` if no - ``default_value`` is specified. - - Similar to ``dict.get()``. - - Example:: - - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parse_string("1999/12/31") - print(result.get("year")) # -> '1999' - print(result.get("hour", "not specified")) # -> 'not specified' - print(result.get("hour")) # -> None - """ - if key in self: - return self[key] - else: - return default_value - - def insert(self, index, ins_string): - """ - Inserts new element at location index in the list of parsed tokens. - - Similar to ``list.insert()``. - - Example:: - - numlist = Word(nums)[...] - print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] - - # use a parse action to insert the parse location in the front of the parsed results - def insert_locn(locn, tokens): - tokens.insert(0, locn) - numlist.add_parse_action(insert_locn) - print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321'] - """ - self._toklist.insert(index, ins_string) - # fixup indices in token dictionary - for occurrences in self._tokdict.values(): - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset( - value, position + (position > index) - ) - - def append(self, item): - """ - Add single element to end of ``ParseResults`` list of elements. - - Example:: - - numlist = Word(nums)[...] - print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321'] - - # use a parse action to compute the sum of the parsed integers, and add it to the end - def append_sum(tokens): - tokens.append(sum(map(int, tokens))) - numlist.add_parse_action(append_sum) - print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444] - """ - self._toklist.append(item) - - def extend(self, itemseq): - """ - Add sequence of elements to end of ``ParseResults`` list of elements. - - Example:: - - patt = Word(alphas)[1, ...] - - # use a parse action to append the reverse of the matched strings, to make a palindrome - def make_palindrome(tokens): - tokens.extend(reversed([t[::-1] for t in tokens])) - return ''.join(tokens) - patt.add_parse_action(make_palindrome) - print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' - """ - if isinstance(itemseq, ParseResults): - self.__iadd__(itemseq) - else: - self._toklist.extend(itemseq) - - def clear(self): - """ - Clear all elements and results names. - """ - del self._toklist[:] - self._tokdict.clear() - - def __getattr__(self, name): - try: - return self[name] - except KeyError: - if name.startswith("__"): - raise AttributeError(name) - return "" - - def __add__(self, other: "ParseResults") -> "ParseResults": - ret = self.copy() - ret += other - return ret - - def __iadd__(self, other: "ParseResults") -> "ParseResults": - if not other: - return self - - if other._tokdict: - offset = len(self._toklist) - addoffset = lambda a: offset if a < 0 else a + offset - otheritems = other._tokdict.items() - otherdictitems = [ - (k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) - for k, vlist in otheritems - for v in vlist - ] - for k, v in otherdictitems: - self[k] = v - if isinstance(v[0], ParseResults): - v[0]._parent = self - - self._toklist += other._toklist - self._all_names |= other._all_names - return self - - def __radd__(self, other) -> "ParseResults": - if isinstance(other, int) and other == 0: - # useful for merging many ParseResults using sum() builtin - return self.copy() - else: - # this may raise a TypeError - so be it - return other + self - - def __repr__(self) -> str: - return f"{type(self).__name__}({self._toklist!r}, {self.as_dict()})" - - def __str__(self) -> str: - return ( - "[" - + ", ".join( - [ - str(i) if isinstance(i, ParseResults) else repr(i) - for i in self._toklist - ] - ) - + "]" - ) - - def _asStringList(self, sep=""): - out = [] - for item in self._toklist: - if out and sep: - out.append(sep) - if isinstance(item, ParseResults): - out += item._asStringList() - else: - out.append(str(item)) - return out - - def as_list(self) -> list: - """ - Returns the parse results as a nested list of matching tokens, all converted to strings. - - Example:: - - patt = Word(alphas)[1, ...] - result = patt.parse_string("sldkj lsdkj sldkj") - # even though the result prints in string-like form, it is actually a pyparsing ParseResults - print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] - - # Use as_list() to create an actual list - result_list = result.as_list() - print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] - """ - return [ - res.as_list() if isinstance(res, ParseResults) else res - for res in self._toklist - ] - - def as_dict(self) -> dict: - """ - Returns the named parse results as a nested dictionary. - - Example:: - - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parse_string('12/31/1999') - print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) - - result_dict = result.as_dict() - print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} - - # even though a ParseResults supports dict-like access, sometime you just need to have a dict - import json - print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable - print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"} - """ - - def to_item(obj): - if isinstance(obj, ParseResults): - return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj] - else: - return obj - - return dict((k, to_item(v)) for k, v in self.items()) - - def copy(self) -> "ParseResults": - """ - Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults` - items contained within the source are shared with the copy. Use - :class:`ParseResults.deepcopy()` to create a copy with its own separate - content values. - """ - ret = ParseResults(self._toklist) - ret._tokdict = self._tokdict.copy() - ret._parent = self._parent - ret._all_names |= self._all_names - ret._name = self._name - return ret - - def deepcopy(self) -> "ParseResults": - """ - Returns a new deep copy of a :class:`ParseResults` object. - """ - ret = self.copy() - # replace values with copies if they are of known mutable types - for i, obj in enumerate(self._toklist): - if isinstance(obj, ParseResults): - self._toklist[i] = obj.deepcopy() - elif isinstance(obj, (str, bytes)): - pass - elif isinstance(obj, MutableMapping): - self._toklist[i] = dest = type(obj)() - for k, v in obj.items(): - dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v - elif isinstance(obj, Container): - self._toklist[i] = type(obj)( - v.deepcopy() if isinstance(v, ParseResults) else v for v in obj - ) - return ret - - def get_name(self): - r""" - Returns the results name for this token expression. Useful when several - different expressions might match at a particular location. - - Example:: - - integer = Word(nums) - ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") - house_number_expr = Suppress('#') + Word(nums, alphanums) - user_data = (Group(house_number_expr)("house_number") - | Group(ssn_expr)("ssn") - | Group(integer)("age")) - user_info = user_data[1, ...] - - result = user_info.parse_string("22 111-22-3333 #221B") - for item in result: - print(item.get_name(), ':', item[0]) - - prints:: - - age : 22 - ssn : 111-22-3333 - house_number : 221B - """ - if self._name: - return self._name - elif self._parent: - par: "ParseResults" = self._parent - parent_tokdict_items = par._tokdict.items() - return next( - ( - k - for k, vlist in parent_tokdict_items - for v, loc in vlist - if v is self - ), - None, - ) - elif ( - len(self) == 1 - and len(self._tokdict) == 1 - and next(iter(self._tokdict.values()))[0][1] in (0, -1) - ): - return next(iter(self._tokdict.keys())) - else: - return None - - def dump(self, indent="", full=True, include_list=True, _depth=0) -> str: - """ - Diagnostic method for listing out the contents of - a :class:`ParseResults`. Accepts an optional ``indent`` argument so - that this string can be embedded in a nested display of other data. - - Example:: - - integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - - result = date_str.parse_string('1999/12/31') - print(result.dump()) - - prints:: - - ['1999', '/', '12', '/', '31'] - - day: '31' - - month: '12' - - year: '1999' - """ - out = [] - NL = "\n" - out.append(indent + str(self.as_list()) if include_list else "") - - if not full: - return "".join(out) - - if self.haskeys(): - items = sorted((str(k), v) for k, v in self.items()) - for k, v in items: - if out: - out.append(NL) - out.append(f"{indent}{(' ' * _depth)}- {k}: ") - if not isinstance(v, ParseResults): - out.append(repr(v)) - continue - - if not v: - out.append(str(v)) - continue - - out.append( - v.dump( - indent=indent, - full=full, - include_list=include_list, - _depth=_depth + 1, - ) - ) - if not any(isinstance(vv, ParseResults) for vv in self): - return "".join(out) - - v = self - incr = " " - nl = "\n" - for i, vv in enumerate(v): - if isinstance(vv, ParseResults): - vv_dump = vv.dump( - indent=indent, - full=full, - include_list=include_list, - _depth=_depth + 1, - ) - out.append( - f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv_dump}" - ) - else: - out.append( - f"{nl}{indent}{incr * _depth}[{i}]:{nl}{indent}{incr * (_depth + 1)}{vv}" - ) - - return "".join(out) - - def pprint(self, *args, **kwargs): - """ - Pretty-printer for parsed results as a list, using the - `pprint `_ module. - Accepts additional positional or keyword args as defined for - `pprint.pprint `_ . - - Example:: - - ident = Word(alphas, alphanums) - num = Word(nums) - func = Forward() - term = ident | num | Group('(' + func + ')') - func <<= ident + Group(Optional(DelimitedList(term))) - result = func.parse_string("fna a,b,(fnb c,d,200),100") - result.pprint(width=40) - - prints:: - - ['fna', - ['a', - 'b', - ['(', 'fnb', ['c', 'd', '200'], ')'], - '100']] - """ - pprint.pprint(self.as_list(), *args, **kwargs) - - # add support for pickle protocol - def __getstate__(self): - return ( - self._toklist, - ( - self._tokdict.copy(), - None, - self._all_names, - self._name, - ), - ) - - def __setstate__(self, state): - self._toklist, (self._tokdict, par, inAccumNames, self._name) = state - self._all_names = set(inAccumNames) - self._parent = None - - def __getnewargs__(self): - return self._toklist, self._name - - def __dir__(self): - return dir(type(self)) + list(self.keys()) - - @classmethod - def from_dict(cls, other, name=None) -> "ParseResults": - """ - Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the - name-value relations as results names. If an optional ``name`` argument is - given, a nested ``ParseResults`` will be returned. - """ - - def is_iterable(obj): - try: - iter(obj) - except Exception: - return False - # str's are iterable, but in pyparsing, we don't want to iterate over them - else: - return not isinstance(obj, str_type) - - ret = cls([]) - for k, v in other.items(): - if isinstance(v, Mapping): - ret += cls.from_dict(v, name=k) - else: - ret += cls([v], name=k, asList=is_iterable(v)) - if name is not None: - ret = cls([ret], name=name) - return ret - - asList = as_list - """Deprecated - use :class:`as_list`""" - asDict = as_dict - """Deprecated - use :class:`as_dict`""" - getName = get_name - """Deprecated - use :class:`get_name`""" - - -MutableMapping.register(ParseResults) -MutableSequence.register(ParseResults) diff --git a/src/pip/_vendor/pyparsing/testing.py b/src/pip/_vendor/pyparsing/testing.py deleted file mode 100644 index 5654d47d62d..00000000000 --- a/src/pip/_vendor/pyparsing/testing.py +++ /dev/null @@ -1,345 +0,0 @@ -# testing.py - -from contextlib import contextmanager -import re -import typing - - -from .core import ( - ParserElement, - ParseException, - Keyword, - __diag__, - __compat__, -) - - -class pyparsing_test: - """ - namespace class for classes useful in writing unit tests - """ - - class reset_pyparsing_context: - """ - Context manager to be used when writing unit tests that modify pyparsing config values: - - packrat parsing - - bounded recursion parsing - - default whitespace characters. - - default keyword characters - - literal string auto-conversion class - - __diag__ settings - - Example:: - - with reset_pyparsing_context(): - # test that literals used to construct a grammar are automatically suppressed - ParserElement.inlineLiteralsUsing(Suppress) - - term = Word(alphas) | Word(nums) - group = Group('(' + term[...] + ')') - - # assert that the '()' characters are not included in the parsed tokens - self.assertParseAndCheckList(group, "(abc 123 def)", ['abc', '123', 'def']) - - # after exiting context manager, literals are converted to Literal expressions again - """ - - def __init__(self): - self._save_context = {} - - def save(self): - self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS - self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS - - self._save_context["literal_string_class"] = ( - ParserElement._literalStringClass - ) - - self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace - - self._save_context["packrat_enabled"] = ParserElement._packratEnabled - if ParserElement._packratEnabled: - self._save_context["packrat_cache_size"] = ( - ParserElement.packrat_cache.size - ) - else: - self._save_context["packrat_cache_size"] = None - self._save_context["packrat_parse"] = ParserElement._parse - self._save_context["recursion_enabled"] = ( - ParserElement._left_recursion_enabled - ) - - self._save_context["__diag__"] = { - name: getattr(__diag__, name) for name in __diag__._all_names - } - - self._save_context["__compat__"] = { - "collect_all_And_tokens": __compat__.collect_all_And_tokens - } - - return self - - def restore(self): - # reset pyparsing global state - if ( - ParserElement.DEFAULT_WHITE_CHARS - != self._save_context["default_whitespace"] - ): - ParserElement.set_default_whitespace_chars( - self._save_context["default_whitespace"] - ) - - ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"] - - Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"] - ParserElement.inlineLiteralsUsing( - self._save_context["literal_string_class"] - ) - - for name, value in self._save_context["__diag__"].items(): - (__diag__.enable if value else __diag__.disable)(name) - - ParserElement._packratEnabled = False - if self._save_context["packrat_enabled"]: - ParserElement.enable_packrat(self._save_context["packrat_cache_size"]) - else: - ParserElement._parse = self._save_context["packrat_parse"] - ParserElement._left_recursion_enabled = self._save_context[ - "recursion_enabled" - ] - - __compat__.collect_all_And_tokens = self._save_context["__compat__"] - - return self - - def copy(self): - ret = type(self)() - ret._save_context.update(self._save_context) - return ret - - def __enter__(self): - return self.save() - - def __exit__(self, *args): - self.restore() - - class TestParseResultsAsserts: - """ - A mixin class to add parse results assertion methods to normal unittest.TestCase classes. - """ - - def assertParseResultsEquals( - self, result, expected_list=None, expected_dict=None, msg=None - ): - """ - Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``, - and compare any defined results names with an optional ``expected_dict``. - """ - if expected_list is not None: - self.assertEqual(expected_list, result.as_list(), msg=msg) - if expected_dict is not None: - self.assertEqual(expected_dict, result.as_dict(), msg=msg) - - def assertParseAndCheckList( - self, expr, test_string, expected_list, msg=None, verbose=True - ): - """ - Convenience wrapper assert to test a parser element and input string, and assert that - the resulting ``ParseResults.asList()`` is equal to the ``expected_list``. - """ - result = expr.parse_string(test_string, parse_all=True) - if verbose: - print(result.dump()) - else: - print(result.as_list()) - self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg) - - def assertParseAndCheckDict( - self, expr, test_string, expected_dict, msg=None, verbose=True - ): - """ - Convenience wrapper assert to test a parser element and input string, and assert that - the resulting ``ParseResults.asDict()`` is equal to the ``expected_dict``. - """ - result = expr.parse_string(test_string, parseAll=True) - if verbose: - print(result.dump()) - else: - print(result.as_list()) - self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg) - - def assertRunTestResults( - self, run_tests_report, expected_parse_results=None, msg=None - ): - """ - Unit test assertion to evaluate output of ``ParserElement.runTests()``. If a list of - list-dict tuples is given as the ``expected_parse_results`` argument, then these are zipped - with the report tuples returned by ``runTests`` and evaluated using ``assertParseResultsEquals``. - Finally, asserts that the overall ``runTests()`` success value is ``True``. - - :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests - :param expected_parse_results (optional): [tuple(str, list, dict, Exception)] - """ - run_test_success, run_test_results = run_tests_report - - if expected_parse_results is None: - self.assertTrue( - run_test_success, msg=msg if msg is not None else "failed runTests" - ) - return - - merged = [ - (*rpt, expected) - for rpt, expected in zip(run_test_results, expected_parse_results) - ] - for test_string, result, expected in merged: - # expected should be a tuple containing a list and/or a dict or an exception, - # and optional failure message string - # an empty tuple will skip any result validation - fail_msg = next((exp for exp in expected if isinstance(exp, str)), None) - expected_exception = next( - ( - exp - for exp in expected - if isinstance(exp, type) and issubclass(exp, Exception) - ), - None, - ) - if expected_exception is not None: - with self.assertRaises( - expected_exception=expected_exception, msg=fail_msg or msg - ): - if isinstance(result, Exception): - raise result - else: - expected_list = next( - (exp for exp in expected if isinstance(exp, list)), None - ) - expected_dict = next( - (exp for exp in expected if isinstance(exp, dict)), None - ) - if (expected_list, expected_dict) != (None, None): - self.assertParseResultsEquals( - result, - expected_list=expected_list, - expected_dict=expected_dict, - msg=fail_msg or msg, - ) - else: - # warning here maybe? - print(f"no validation for {test_string!r}") - - # do this last, in case some specific test results can be reported instead - self.assertTrue( - run_test_success, msg=msg if msg is not None else "failed runTests" - ) - - @contextmanager - def assertRaisesParseException( - self, exc_type=ParseException, expected_msg=None, msg=None - ): - if expected_msg is not None: - if isinstance(expected_msg, str): - expected_msg = re.escape(expected_msg) - with self.assertRaisesRegex(exc_type, expected_msg, msg=msg) as ctx: - yield ctx - - else: - with self.assertRaises(exc_type, msg=msg) as ctx: - yield ctx - - @staticmethod - def with_line_numbers( - s: str, - start_line: typing.Optional[int] = None, - end_line: typing.Optional[int] = None, - expand_tabs: bool = True, - eol_mark: str = "|", - mark_spaces: typing.Optional[str] = None, - mark_control: typing.Optional[str] = None, - ) -> str: - """ - Helpful method for debugging a parser - prints a string with line and column numbers. - (Line and column numbers are 1-based.) - - :param s: tuple(bool, str - string to be printed with line and column numbers - :param start_line: int - (optional) starting line number in s to print (default=1) - :param end_line: int - (optional) ending line number in s to print (default=len(s)) - :param expand_tabs: bool - (optional) expand tabs to spaces, to match the pyparsing default - :param eol_mark: str - (optional) string to mark the end of lines, helps visualize trailing spaces (default="|") - :param mark_spaces: str - (optional) special character to display in place of spaces - :param mark_control: str - (optional) convert non-printing control characters to a placeholding - character; valid values: - - "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊" - - any single character string - replace control characters with given string - - None (default) - string is displayed as-is - - :return: str - input string with leading line numbers and column number headers - """ - if expand_tabs: - s = s.expandtabs() - if mark_control is not None: - mark_control = typing.cast(str, mark_control) - if mark_control == "unicode": - transtable_map = { - c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433)) - } - transtable_map[127] = 0x2421 - tbl = str.maketrans(transtable_map) - eol_mark = "" - else: - ord_mark_control = ord(mark_control) - tbl = str.maketrans( - {c: ord_mark_control for c in list(range(0, 32)) + [127]} - ) - s = s.translate(tbl) - if mark_spaces is not None and mark_spaces != " ": - if mark_spaces == "unicode": - tbl = str.maketrans({9: 0x2409, 32: 0x2423}) - s = s.translate(tbl) - else: - s = s.replace(" ", mark_spaces) - if start_line is None: - start_line = 1 - if end_line is None: - end_line = len(s) - end_line = min(end_line, len(s)) - start_line = min(max(1, start_line), end_line) - - if mark_control != "unicode": - s_lines = s.splitlines()[start_line - 1 : end_line] - else: - s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]] - if not s_lines: - return "" - - lineno_width = len(str(end_line)) - max_line_len = max(len(line) for line in s_lines) - lead = " " * (lineno_width + 1) - if max_line_len >= 99: - header0 = ( - lead - + "".join( - f"{' ' * 99}{(i + 1) % 100}" - for i in range(max(max_line_len // 100, 1)) - ) - + "\n" - ) - else: - header0 = "" - header1 = ( - header0 - + lead - + "".join(f" {(i + 1) % 10}" for i in range(-(-max_line_len // 10))) - + "\n" - ) - header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n" - return ( - header1 - + header2 - + "\n".join( - f"{i:{lineno_width}d}:{line}{eol_mark}" - for i, line in enumerate(s_lines, start=start_line) - ) - + "\n" - ) diff --git a/src/pip/_vendor/pyparsing/unicode.py b/src/pip/_vendor/pyparsing/unicode.py deleted file mode 100644 index e0b7b4ccb9a..00000000000 --- a/src/pip/_vendor/pyparsing/unicode.py +++ /dev/null @@ -1,354 +0,0 @@ -# unicode.py - -import sys -from itertools import filterfalse -from typing import List, Tuple, Union - - -class _lazyclassproperty: - def __init__(self, fn): - self.fn = fn - self.__doc__ = fn.__doc__ - self.__name__ = fn.__name__ - - def __get__(self, obj, cls): - if cls is None: - cls = type(obj) - if not hasattr(cls, "_intern") or any( - cls._intern is getattr(superclass, "_intern", []) - for superclass in cls.__mro__[1:] - ): - cls._intern = {} - attrname = self.fn.__name__ - if attrname not in cls._intern: - cls._intern[attrname] = self.fn(cls) - return cls._intern[attrname] - - -UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]] - - -class unicode_set: - """ - A set of Unicode characters, for language-specific strings for - ``alphas``, ``nums``, ``alphanums``, and ``printables``. - A unicode_set is defined by a list of ranges in the Unicode character - set, in a class attribute ``_ranges``. Ranges can be specified using - 2-tuples or a 1-tuple, such as:: - - _ranges = [ - (0x0020, 0x007e), - (0x00a0, 0x00ff), - (0x0100,), - ] - - Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x). - - A unicode set can also be defined using multiple inheritance of other unicode sets:: - - class CJK(Chinese, Japanese, Korean): - pass - """ - - _ranges: UnicodeRangeList = [] - - @_lazyclassproperty - def _chars_for_ranges(cls): - ret = [] - for cc in cls.__mro__: - if cc is unicode_set: - break - for rr in getattr(cc, "_ranges", ()): - ret.extend(range(rr[0], rr[-1] + 1)) - return [chr(c) for c in sorted(set(ret))] - - @_lazyclassproperty - def printables(cls): - """all non-whitespace characters in this range""" - return "".join(filterfalse(str.isspace, cls._chars_for_ranges)) - - @_lazyclassproperty - def alphas(cls): - """all alphabetic characters in this range""" - return "".join(filter(str.isalpha, cls._chars_for_ranges)) - - @_lazyclassproperty - def nums(cls): - """all numeric digit characters in this range""" - return "".join(filter(str.isdigit, cls._chars_for_ranges)) - - @_lazyclassproperty - def alphanums(cls): - """all alphanumeric characters in this range""" - return cls.alphas + cls.nums - - @_lazyclassproperty - def identchars(cls): - """all characters in this range that are valid identifier characters, plus underscore '_'""" - return "".join( - sorted( - set( - "".join(filter(str.isidentifier, cls._chars_for_ranges)) - + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº" - + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ" - + "_" - ) - ) - ) - - @_lazyclassproperty - def identbodychars(cls): - """ - all characters in this range that are valid identifier body characters, - plus the digits 0-9, and · (Unicode MIDDLE DOT) - """ - identifier_chars = set( - c for c in cls._chars_for_ranges if ("_" + c).isidentifier() - ) - return "".join(sorted(identifier_chars | set(cls.identchars + "0123456789·"))) - - @_lazyclassproperty - def identifier(cls): - """ - a pyparsing Word expression for an identifier using this range's definitions for - identchars and identbodychars - """ - from pip._vendor.pyparsing import Word - - return Word(cls.identchars, cls.identbodychars) - - -class pyparsing_unicode(unicode_set): - """ - A namespace class for defining common language unicode_sets. - """ - - # fmt: off - - # define ranges in language character sets - _ranges: UnicodeRangeList = [ - (0x0020, sys.maxunicode), - ] - - class BasicMultilingualPlane(unicode_set): - """Unicode set for the Basic Multilingual Plane""" - _ranges: UnicodeRangeList = [ - (0x0020, 0xFFFF), - ] - - class Latin1(unicode_set): - """Unicode set for Latin-1 Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x0020, 0x007E), - (0x00A0, 0x00FF), - ] - - class LatinA(unicode_set): - """Unicode set for Latin-A Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x0100, 0x017F), - ] - - class LatinB(unicode_set): - """Unicode set for Latin-B Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x0180, 0x024F), - ] - - class Greek(unicode_set): - """Unicode set for Greek Unicode Character Ranges""" - _ranges: UnicodeRangeList = [ - (0x0342, 0x0345), - (0x0370, 0x0377), - (0x037A, 0x037F), - (0x0384, 0x038A), - (0x038C,), - (0x038E, 0x03A1), - (0x03A3, 0x03E1), - (0x03F0, 0x03FF), - (0x1D26, 0x1D2A), - (0x1D5E,), - (0x1D60,), - (0x1D66, 0x1D6A), - (0x1F00, 0x1F15), - (0x1F18, 0x1F1D), - (0x1F20, 0x1F45), - (0x1F48, 0x1F4D), - (0x1F50, 0x1F57), - (0x1F59,), - (0x1F5B,), - (0x1F5D,), - (0x1F5F, 0x1F7D), - (0x1F80, 0x1FB4), - (0x1FB6, 0x1FC4), - (0x1FC6, 0x1FD3), - (0x1FD6, 0x1FDB), - (0x1FDD, 0x1FEF), - (0x1FF2, 0x1FF4), - (0x1FF6, 0x1FFE), - (0x2129,), - (0x2719, 0x271A), - (0xAB65,), - (0x10140, 0x1018D), - (0x101A0,), - (0x1D200, 0x1D245), - (0x1F7A1, 0x1F7A7), - ] - - class Cyrillic(unicode_set): - """Unicode set for Cyrillic Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x0400, 0x052F), - (0x1C80, 0x1C88), - (0x1D2B,), - (0x1D78,), - (0x2DE0, 0x2DFF), - (0xA640, 0xA672), - (0xA674, 0xA69F), - (0xFE2E, 0xFE2F), - ] - - class Chinese(unicode_set): - """Unicode set for Chinese Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x2E80, 0x2E99), - (0x2E9B, 0x2EF3), - (0x31C0, 0x31E3), - (0x3400, 0x4DB5), - (0x4E00, 0x9FEF), - (0xA700, 0xA707), - (0xF900, 0xFA6D), - (0xFA70, 0xFAD9), - (0x16FE2, 0x16FE3), - (0x1F210, 0x1F212), - (0x1F214, 0x1F23B), - (0x1F240, 0x1F248), - (0x20000, 0x2A6D6), - (0x2A700, 0x2B734), - (0x2B740, 0x2B81D), - (0x2B820, 0x2CEA1), - (0x2CEB0, 0x2EBE0), - (0x2F800, 0x2FA1D), - ] - - class Japanese(unicode_set): - """Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges""" - - class Kanji(unicode_set): - "Unicode set for Kanji Unicode Character Range" - _ranges: UnicodeRangeList = [ - (0x4E00, 0x9FBF), - (0x3000, 0x303F), - ] - - class Hiragana(unicode_set): - """Unicode set for Hiragana Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x3041, 0x3096), - (0x3099, 0x30A0), - (0x30FC,), - (0xFF70,), - (0x1B001,), - (0x1B150, 0x1B152), - (0x1F200,), - ] - - class Katakana(unicode_set): - """Unicode set for Katakana Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x3099, 0x309C), - (0x30A0, 0x30FF), - (0x31F0, 0x31FF), - (0x32D0, 0x32FE), - (0xFF65, 0xFF9F), - (0x1B000,), - (0x1B164, 0x1B167), - (0x1F201, 0x1F202), - (0x1F213,), - ] - - 漢字 = Kanji - カタカナ = Katakana - ひらがな = Hiragana - - _ranges = ( - Kanji._ranges - + Hiragana._ranges - + Katakana._ranges - ) - - class Hangul(unicode_set): - """Unicode set for Hangul (Korean) Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x1100, 0x11FF), - (0x302E, 0x302F), - (0x3131, 0x318E), - (0x3200, 0x321C), - (0x3260, 0x327B), - (0x327E,), - (0xA960, 0xA97C), - (0xAC00, 0xD7A3), - (0xD7B0, 0xD7C6), - (0xD7CB, 0xD7FB), - (0xFFA0, 0xFFBE), - (0xFFC2, 0xFFC7), - (0xFFCA, 0xFFCF), - (0xFFD2, 0xFFD7), - (0xFFDA, 0xFFDC), - ] - - Korean = Hangul - - class CJK(Chinese, Japanese, Hangul): - """Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range""" - - class Thai(unicode_set): - """Unicode set for Thai Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x0E01, 0x0E3A), - (0x0E3F, 0x0E5B) - ] - - class Arabic(unicode_set): - """Unicode set for Arabic Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x0600, 0x061B), - (0x061E, 0x06FF), - (0x0700, 0x077F), - ] - - class Hebrew(unicode_set): - """Unicode set for Hebrew Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x0591, 0x05C7), - (0x05D0, 0x05EA), - (0x05EF, 0x05F4), - (0xFB1D, 0xFB36), - (0xFB38, 0xFB3C), - (0xFB3E,), - (0xFB40, 0xFB41), - (0xFB43, 0xFB44), - (0xFB46, 0xFB4F), - ] - - class Devanagari(unicode_set): - """Unicode set for Devanagari Unicode Character Range""" - _ranges: UnicodeRangeList = [ - (0x0900, 0x097F), - (0xA8E0, 0xA8FF) - ] - - BMP = BasicMultilingualPlane - - # add language identifiers using language Unicode - العربية = Arabic - 中文 = Chinese - кириллица = Cyrillic - Ελληνικά = Greek - עִברִית = Hebrew - 日本語 = Japanese - 한국어 = Korean - ไทย = Thai - देवनागरी = Devanagari - - # fmt: on diff --git a/src/pip/_vendor/pyparsing/util.py b/src/pip/_vendor/pyparsing/util.py deleted file mode 100644 index 4ae018a9636..00000000000 --- a/src/pip/_vendor/pyparsing/util.py +++ /dev/null @@ -1,277 +0,0 @@ -# util.py -import inspect -import warnings -import types -import collections -import itertools -from functools import lru_cache, wraps -from typing import Callable, List, Union, Iterable, TypeVar, cast - -_bslash = chr(92) -C = TypeVar("C", bound=Callable) - - -class __config_flags: - """Internal class for defining compatibility and debugging flags""" - - _all_names: List[str] = [] - _fixed_names: List[str] = [] - _type_desc = "configuration" - - @classmethod - def _set(cls, dname, value): - if dname in cls._fixed_names: - warnings.warn( - f"{cls.__name__}.{dname} {cls._type_desc} is {str(getattr(cls, dname)).upper()}" - f" and cannot be overridden", - stacklevel=3, - ) - return - if dname in cls._all_names: - setattr(cls, dname, value) - else: - raise ValueError(f"no such {cls._type_desc} {dname!r}") - - enable = classmethod(lambda cls, name: cls._set(name, True)) - disable = classmethod(lambda cls, name: cls._set(name, False)) - - -@lru_cache(maxsize=128) -def col(loc: int, strg: str) -> int: - """ - Returns current column within a string, counting newlines as line separators. - The first column is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See - :class:`ParserElement.parse_string` for more - information on parsing strings containing ```` s, and suggested - methods to maintain a consistent view of the parsed string, the parse - location, and line and column positions within the parsed string. - """ - s = strg - return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc) - - -@lru_cache(maxsize=128) -def lineno(loc: int, strg: str) -> int: - """Returns current line number within a string, counting newlines as line separators. - The first line is number 1. - - Note - the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See :class:`ParserElement.parse_string` - for more information on parsing strings containing ```` s, and - suggested methods to maintain a consistent view of the parsed string, the - parse location, and line and column positions within the parsed string. - """ - return strg.count("\n", 0, loc) + 1 - - -@lru_cache(maxsize=128) -def line(loc: int, strg: str) -> str: - """ - Returns the line of text containing loc within a string, counting newlines as line separators. - """ - last_cr = strg.rfind("\n", 0, loc) - next_cr = strg.find("\n", loc) - return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :] - - -class _UnboundedCache: - def __init__(self): - cache = {} - cache_get = cache.get - self.not_in_cache = not_in_cache = object() - - def get(_, key): - return cache_get(key, not_in_cache) - - def set_(_, key, value): - cache[key] = value - - def clear(_): - cache.clear() - - self.size = None - self.get = types.MethodType(get, self) - self.set = types.MethodType(set_, self) - self.clear = types.MethodType(clear, self) - - -class _FifoCache: - def __init__(self, size): - self.not_in_cache = not_in_cache = object() - cache = {} - keyring = [object()] * size - cache_get = cache.get - cache_pop = cache.pop - keyiter = itertools.cycle(range(size)) - - def get(_, key): - return cache_get(key, not_in_cache) - - def set_(_, key, value): - cache[key] = value - i = next(keyiter) - cache_pop(keyring[i], None) - keyring[i] = key - - def clear(_): - cache.clear() - keyring[:] = [object()] * size - - self.size = size - self.get = types.MethodType(get, self) - self.set = types.MethodType(set_, self) - self.clear = types.MethodType(clear, self) - - -class LRUMemo: - """ - A memoizing mapping that retains `capacity` deleted items - - The memo tracks retained items by their access order; once `capacity` items - are retained, the least recently used item is discarded. - """ - - def __init__(self, capacity): - self._capacity = capacity - self._active = {} - self._memory = collections.OrderedDict() - - def __getitem__(self, key): - try: - return self._active[key] - except KeyError: - self._memory.move_to_end(key) - return self._memory[key] - - def __setitem__(self, key, value): - self._memory.pop(key, None) - self._active[key] = value - - def __delitem__(self, key): - try: - value = self._active.pop(key) - except KeyError: - pass - else: - while len(self._memory) >= self._capacity: - self._memory.popitem(last=False) - self._memory[key] = value - - def clear(self): - self._active.clear() - self._memory.clear() - - -class UnboundedMemo(dict): - """ - A memoizing mapping that retains all deleted items - """ - - def __delitem__(self, key): - pass - - -def _escape_regex_range_chars(s: str) -> str: - # escape these chars: ^-[] - for c in r"\^-[]": - s = s.replace(c, _bslash + c) - s = s.replace("\n", r"\n") - s = s.replace("\t", r"\t") - return str(s) - - -def _collapse_string_to_ranges( - s: Union[str, Iterable[str]], re_escape: bool = True -) -> str: - def is_consecutive(c): - c_int = ord(c) - is_consecutive.prev, prev = c_int, is_consecutive.prev - if c_int - prev > 1: - is_consecutive.value = next(is_consecutive.counter) - return is_consecutive.value - - is_consecutive.prev = 0 # type: ignore [attr-defined] - is_consecutive.counter = itertools.count() # type: ignore [attr-defined] - is_consecutive.value = -1 # type: ignore [attr-defined] - - def escape_re_range_char(c): - return "\\" + c if c in r"\^-][" else c - - def no_escape_re_range_char(c): - return c - - if not re_escape: - escape_re_range_char = no_escape_re_range_char - - ret = [] - s = "".join(sorted(set(s))) - if len(s) > 3: - for _, chars in itertools.groupby(s, key=is_consecutive): - first = last = next(chars) - last = collections.deque( - itertools.chain(iter([last]), chars), maxlen=1 - ).pop() - if first == last: - ret.append(escape_re_range_char(first)) - else: - sep = "" if ord(last) == ord(first) + 1 else "-" - ret.append( - f"{escape_re_range_char(first)}{sep}{escape_re_range_char(last)}" - ) - else: - ret = [escape_re_range_char(c) for c in s] - - return "".join(ret) - - -def _flatten(ll: list) -> list: - ret = [] - for i in ll: - if isinstance(i, list): - ret.extend(_flatten(i)) - else: - ret.append(i) - return ret - - -def replaced_by_pep8(compat_name: str, fn: C) -> C: - # In a future version, uncomment the code in the internal _inner() functions - # to begin emitting DeprecationWarnings. - - # Unwrap staticmethod/classmethod - fn = getattr(fn, "__func__", fn) - - # (Presence of 'self' arg in signature is used by explain_exception() methods, so we take - # some extra steps to add it if present in decorated function.) - if "self" == list(inspect.signature(fn).parameters)[0]: - - @wraps(fn) - def _inner(self, *args, **kwargs): - # warnings.warn( - # f"Deprecated - use {fn.__name__}", DeprecationWarning, stacklevel=2 - # ) - return fn(self, *args, **kwargs) - - else: - - @wraps(fn) - def _inner(*args, **kwargs): - # warnings.warn( - # f"Deprecated - use {fn.__name__}", DeprecationWarning, stacklevel=2 - # ) - return fn(*args, **kwargs) - - _inner.__doc__ = f"""Deprecated - use :class:`{fn.__name__}`""" - _inner.__name__ = compat_name - _inner.__annotations__ = fn.__annotations__ - if isinstance(fn, types.FunctionType): - _inner.__kwdefaults__ = fn.__kwdefaults__ - elif isinstance(fn, type) and hasattr(fn, "__init__"): - _inner.__kwdefaults__ = fn.__init__.__kwdefaults__ - else: - _inner.__kwdefaults__ = None - _inner.__qualname__ = fn.__qualname__ - return cast(C, _inner) diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt index e9694adc037..e433acf8e04 100644 --- a/src/pip/_vendor/vendor.txt +++ b/src/pip/_vendor/vendor.txt @@ -3,9 +3,8 @@ colorama==0.4.6 distlib==0.3.8 distro==1.9.0 msgpack==1.0.8 -packaging==21.3 +packaging==24.0 platformdirs==4.2.1 -pyparsing==3.1.2 pyproject-hooks==1.0.0 requests==2.31.0 certifi==2024.2.2 diff --git a/tests/data/indexes/invalid-version/invalid-version/index.html b/tests/data/indexes/invalid-version/invalid-version/index.html new file mode 100644 index 00000000000..db9c47fed4b --- /dev/null +++ b/tests/data/indexes/invalid-version/invalid-version/index.html @@ -0,0 +1,7 @@ + + + + invalid_version-2010i-py3-none-any.whl + invalid_version-1.0-py3-none-any.whl + + diff --git a/tests/data/indexes/require-invalid-version/invalid-version/index.html b/tests/data/indexes/require-invalid-version/invalid-version/index.html new file mode 100644 index 00000000000..b1ff5550ea1 --- /dev/null +++ b/tests/data/indexes/require-invalid-version/invalid-version/index.html @@ -0,0 +1,6 @@ + + + + invalid_version-2010i-py3-none-any.whl + + diff --git a/tests/data/indexes/require-invalid-version/require-invalid-version/index.html b/tests/data/indexes/require-invalid-version/require-invalid-version/index.html new file mode 100644 index 00000000000..4b3300db082 --- /dev/null +++ b/tests/data/indexes/require-invalid-version/require-invalid-version/index.html @@ -0,0 +1,8 @@ + + + + require_invalid_version-0.1-py3-none-any.whl + require_invalid_version-1.0-py3-none-any.whl + require_invalid_version-1.0.tar.gz + + diff --git a/tests/data/packages/README.txt b/tests/data/packages/README.txt index aa957b337f0..b957f158a3b 100644 --- a/tests/data/packages/README.txt +++ b/tests/data/packages/README.txt @@ -6,9 +6,9 @@ broken-0.1.tar.gz ----------------- This package exists for testing uninstall-rollback. -broken-0.2broken.tar.gz ------------------------ -Version 0.2broken has a setup.py crafted to fail on install (and only on +broken-0.2+broken.tar.gz +------------------------ +Version 0.2+broken has a setup.py crafted to fail on install (and only on install). If any earlier step would fail (i.e. egg-info-generation), the already-installed version would never be uninstalled, so uninstall-rollback would not come into play. @@ -104,3 +104,24 @@ require_simple-1.0.tar.gz ------------------------ contains "require_simple" package which requires simple>=2.0 - used for testing if dependencies are handled correctly. + +invalid_version-2010i-py3-none-any.whl +-------------------------------------- +The invalid-version package with a legacy version. + +invalid_version-1.0-py3-none-any.whl +------------------------------------ +A valid variant of the invalid-version package. Used, for instance, to test that the +version with invalid metadata is ignored in favor of this one. + +require_invalid_version-0.1-py3-none-any.whl +-------------------------------------------- +A valid variant of the require-invalid-version package. + +require_invalid_version-1.0-py3-none-any.whl +-------------------------------------------- +This package has an invalid version specifier in its requirements. + +require_invalid_version-1.0.tar.gz +---------------------------------- +This package has an invalid version specifier in its requirements. diff --git a/tests/data/packages/broken-0.2broken.tar.gz b/tests/data/packages/broken-0.2+broken.tar.gz similarity index 100% rename from tests/data/packages/broken-0.2broken.tar.gz rename to tests/data/packages/broken-0.2+broken.tar.gz diff --git a/tests/data/packages/invalid_version-1.0-py3-none-any.whl b/tests/data/packages/invalid_version-1.0-py3-none-any.whl new file mode 100644 index 00000000000..eeee9527792 Binary files /dev/null and b/tests/data/packages/invalid_version-1.0-py3-none-any.whl differ diff --git a/tests/data/packages/invalid_version-2010i-py3-none-any.whl b/tests/data/packages/invalid_version-2010i-py3-none-any.whl new file mode 100644 index 00000000000..1048e6ffc38 Binary files /dev/null and b/tests/data/packages/invalid_version-2010i-py3-none-any.whl differ diff --git a/tests/data/packages/pip-test-package-0.1.1.tar.gz b/tests/data/packages/pip_test_package-0.1.1.tar.gz similarity index 100% rename from tests/data/packages/pip-test-package-0.1.1.tar.gz rename to tests/data/packages/pip_test_package-0.1.1.tar.gz diff --git a/tests/data/packages/pip-test-package-0.1.tar.gz b/tests/data/packages/pip_test_package-0.1.tar.gz similarity index 100% rename from tests/data/packages/pip-test-package-0.1.tar.gz rename to tests/data/packages/pip_test_package-0.1.tar.gz diff --git a/tests/data/packages/require_invalid_version-0.1-py3-none-any.whl b/tests/data/packages/require_invalid_version-0.1-py3-none-any.whl new file mode 100644 index 00000000000..976b80b89a0 Binary files /dev/null and b/tests/data/packages/require_invalid_version-0.1-py3-none-any.whl differ diff --git a/tests/data/packages/require_invalid_version-1.0-py3-none-any.whl b/tests/data/packages/require_invalid_version-1.0-py3-none-any.whl new file mode 100644 index 00000000000..3dd628a16d7 Binary files /dev/null and b/tests/data/packages/require_invalid_version-1.0-py3-none-any.whl differ diff --git a/tests/data/packages/require_invalid_version-1.0.tar.gz b/tests/data/packages/require_invalid_version-1.0.tar.gz new file mode 100644 index 00000000000..9db066988ff Binary files /dev/null and b/tests/data/packages/require_invalid_version-1.0.tar.gz differ diff --git a/tests/functional/test_check.py b/tests/functional/test_check.py index 79b6df39c19..46ecdcc6457 100644 --- a/tests/functional/test_check.py +++ b/tests/functional/test_check.py @@ -272,7 +272,7 @@ def test_basic_check_broken_metadata(script: PipTestEnvironment) -> None: result = script.pip("check", expect_error=True) - assert "Error parsing requirements" in result.stderr + assert "Error parsing dependencies of" in result.stderr assert result.returncode == 1 diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index b65212f929c..eaea12a163c 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -391,7 +391,7 @@ def test_install_editable_uninstalls_existing( https://github.com/pypa/pip/issues/1548 https://github.com/pypa/pip/pull/1552 """ - to_install = data.packages.joinpath("pip-test-package-0.1.tar.gz") + to_install = data.packages.joinpath("pip_test_package-0.1.tar.gz") result = script.pip_install_local(to_install) assert "Successfully installed pip-test-package" in result.stdout result.assert_installed("piptestpackage", editable=False) @@ -2253,14 +2253,14 @@ def test_yanked_version_missing_from_availble_versions_error_message( """ result = script.pip( "install", - "simple==", + "simple==0.1", "--index-url", data.index_url("yanked"), expect_error=True, ) # the yanked version (3.0) is filtered out from the output: expected_warning = ( - "Could not find a version that satisfies the requirement simple== " + "Could not find a version that satisfies the requirement simple==0.1 " "(from versions: 1.0, 2.0)" ) assert expected_warning in result.stderr, str(result) diff --git a/tests/functional/test_install_extras.py b/tests/functional/test_install_extras.py index 1dd67be0a0c..4dd27bb9df3 100644 --- a/tests/functional/test_install_extras.py +++ b/tests/functional/test_install_extras.py @@ -152,24 +152,14 @@ def test_install_fails_if_extra_at_end( script.scratch_path / "requirements.txt", expect_error=True, ) - assert "Extras after version" in result.stderr + assert "Invalid requirement: 'requires_simple_extra>=0.1[extra]'" in result.stderr @pytest.mark.parametrize( "specified_extra, requested_extra", [ ("Hop_hOp-hoP", "Hop_hOp-hoP"), - pytest.param( - "Hop_hOp-hoP", - "hop-hop-hop", - marks=pytest.mark.xfail( - reason=( - "matching a normalized extra request against an" - "unnormalized extra in metadata requires PEP 685 support " - "in packaging (see pypa/pip#11445)." - ), - ), - ), + ("Hop_hOp-hoP", "hop-hop-hop"), ("hop-hop-hop", "Hop_hOp-hoP"), ], ) diff --git a/tests/functional/test_install_upgrade.py b/tests/functional/test_install_upgrade.py index 6556fcdf599..00036c1ef05 100644 --- a/tests/functional/test_install_upgrade.py +++ b/tests/functional/test_install_upgrade.py @@ -295,7 +295,7 @@ def test_uninstall_rollback(script: PipTestEnvironment, data: TestData) -> None: "-f", data.find_links, "--no-index", - "broken===0.2broken", + "broken===0.2+broken", expect_error=True, ) assert result2.returncode == 1, str(result2) diff --git a/tests/functional/test_invalid_versions_and_specifiers.py b/tests/functional/test_invalid_versions_and_specifiers.py new file mode 100644 index 00000000000..21ce02085fc --- /dev/null +++ b/tests/functional/test_invalid_versions_and_specifiers.py @@ -0,0 +1,139 @@ +import zipfile + +import pytest + +from pip._internal.metadata import select_backend +from tests.lib import PipTestEnvironment, TestData + + +def test_install_from_index_with_invalid_version( + script: PipTestEnvironment, data: TestData +) -> None: + """ + Test that pip does not crash when installing a package from an index with + an invalid version. It ignores invalid versions. + """ + index_url = data.index_url("invalid-version") + result = script.pip( + "install", "--dry-run", "--index-url", index_url, "invalid-version" + ) + assert "Would install invalid-version-1.0" in result.stdout + + +def test_install_from_index_with_invalid_specifier( + script: PipTestEnvironment, data: TestData +) -> None: + """ + Test that pip does not crash when installing a package with an invalid + version specifier in its dependencies. + """ + index_url = data.index_url("require-invalid-version") + result = script.pip( + "install", + "--dry-run", + "--index-url", + index_url, + "require-invalid-version", + allow_stderr_warning=True, + ) + assert ( + "WARNING: Ignoring version 1.0 of require-invalid-version " + "since it has invalid metadata" + ) in result.stderr + assert "Would install require-invalid-version-0.1" in result.stdout + + +def _install_invalid_version(script: PipTestEnvironment, data: TestData) -> None: + """ + Install a package with an invalid version. + """ + with zipfile.ZipFile( + data.packages.joinpath("invalid_version-2010i-py3-none-any.whl") + ) as zf: + zf.extractall(script.site_packages_path) + + +def _install_require_invalid_version( + script: PipTestEnvironment, data: TestData +) -> None: + """ + Install a package with an invalid version. + """ + with zipfile.ZipFile( + data.packages.joinpath("require_invalid_version-1.0-py3-none-any.whl") + ) as zf: + zf.extractall(script.site_packages_path) + + +def test_uninstall_invalid_version(script: PipTestEnvironment, data: TestData) -> None: + """ + Test that it is possible to uninstall a distribution with an invalid version. + """ + _install_invalid_version(script, data) + script.pip("uninstall", "-y", "invalid-version") + + +@pytest.mark.xfail +def test_upgrade_invalid_version(script: PipTestEnvironment, data: TestData) -> None: + """ + Test that it is possible to upgrade a distribution with an invalid version. + """ + _install_invalid_version(script, data) + index_url = data.index_url("invalid-version") + script.pip("install", "--index-url", index_url, "invalid-version") + + +@pytest.mark.xfail +def test_upgrade_require_invalid_version( + script: PipTestEnvironment, data: TestData +) -> None: + """ + Test that it is possible to upgrade a distribution with an invalid metadata. + """ + _install_require_invalid_version(script, data) + index_url = data.index_url("require-invalid-version") + script.pip("install", "--index-url", index_url, "require-invalid-version") + + +def test_list_invalid_version(script: PipTestEnvironment, data: TestData) -> None: + """ + Test that pip can list an environment containing a package with a legacy version. + """ + _install_invalid_version(script, data) + script.pip("list") + + +def test_freeze_invalid_version(script: PipTestEnvironment, data: TestData) -> None: + """ + Test that pip can freeze an environment containing a package with a legacy version. + """ + _install_invalid_version(script, data) + result = script.pip("freeze") + assert "invalid-version===2010i\n" in result.stdout + + +def test_show_invalid_version(script: PipTestEnvironment, data: TestData) -> None: + """ + Test that pip can show an installed distribution with a legacy version. + """ + _install_invalid_version(script, data) + result = script.pip("show", "invalid-version") + assert "Name: invalid-version\nVersion: 2010i\n" in result.stdout + + +def test_show_require_invalid_version( + script: PipTestEnvironment, data: TestData +) -> None: + """ + Test that pip can show an installed distribution with a legacy specifier. + """ + _install_require_invalid_version(script, data) + result = script.pip("show", "require-invalid-version") + assert "Name: require-invalid-version\nVersion: 1.0\n" in result.stdout + assert "Requires: invalid-version ==2010i\n" in result.stdout + if select_backend().NAME == "importlib": + assert "Required-by: #N/A\n" in result.stdout + elif select_backend().NAME == "pkg_resources": + assert "Required-by: \n" in result.stdout + else: + assert False, "Unknown metadata backend" diff --git a/tests/lib/wheel.py b/tests/lib/wheel.py index e63f44e1cad..342abbacaad 100644 --- a/tests/lib/wheel.py +++ b/tests/lib/wheel.py @@ -103,7 +103,7 @@ def make_metadata_file( if body is not _default: message.set_payload(body) - return File(path, message_from_dict(metadata).as_bytes()) + return File(path, message.as_bytes()) def make_wheel_metadata_file( diff --git a/tests/unit/metadata/test_metadata_pkg_resources.py b/tests/unit/metadata/test_metadata_pkg_resources.py index ab1a56107f4..6044c14e4ca 100644 --- a/tests/unit/metadata/test_metadata_pkg_resources.py +++ b/tests/unit/metadata/test_metadata_pkg_resources.py @@ -4,7 +4,9 @@ from unittest import mock import pytest +from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.specifiers import SpecifierSet +from pip._vendor.packaging.utils import canonicalize_name from pip._vendor.packaging.version import parse as parse_version from pip._internal.exceptions import UnsupportedWheel @@ -106,10 +108,10 @@ def test_wheel_metadata_works() -> None: assert name == dist.canonical_name == dist.raw_name assert parse_version(version) == dist.version - assert set(extras) == set(dist.iter_provided_extras()) + assert {canonicalize_name(e) for e in extras} == set(dist.iter_provided_extras()) assert [require_a] == [str(r) for r in dist.iter_dependencies()] - assert [require_a, require_b] == [ - str(r) for r in dist.iter_dependencies(["also_b"]) + assert [Requirement(require_a), Requirement(require_b)] == [ + Requirement(str(r)) for r in dist.iter_dependencies(["also_b"]) ] assert metadata.as_string() == dist.metadata.as_string() assert SpecifierSet(requires_python) == dist.requires_python