Skip to content

Commit

Permalink
preparing for 1.5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben Avrahami committed Apr 8, 2024
1 parent 6bb7bf4 commit 02d291f
Show file tree
Hide file tree
Showing 11 changed files with 259 additions and 104 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
unittest:
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] # format: 3.7, 3.8, 3.9
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12"] # format: 3.7, 3.8, 3.9
platform: [ubuntu-latest, macos-latest, windows-latest]
fail-fast: false
runs-on: ${{ matrix.platform }}
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
# envolved Changelog
## 1.5.0
### Removed
* `envolved` no longer supports python 3.7
### Added
* `FindIterCollectionParser`
### Fixed
* `CollectionParser`'s `opener` and `closer` arguments now correctly handle matches that would be split by the delimiter
* `CollectionParser`'s `closer` argument now correctly handles overlapping matches
* `CollectionParser`'s `closer` argument is now faster when using non-regex matches
* `CollectionParser.pair_wise_delimited` will now be more memory efficient when using a mapping `value_type`
## 1.4.0
### Deprecated
* this is the last release to support python 3.7
Expand Down
35 changes: 33 additions & 2 deletions docs/string_parsing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@ Utility Parsers
:param delimiter: The delimiter string or pattern to split the string on.
:param inner_parser: The parser to use to parse the elements of the collection. Note this parser is treated the
same an an EnvVar type, so :ref:`string_parsing:Special parsers` apply.
:param output_type: The type to use to aggregate the parsed items to a collection defaults to list.
:param output_type: The type to use to aggregate the parsed items to a collection. Defaults to list.
:param opener: If set, specifies a string or pattern that should be at the beginning of the delimited string.
:param closer: If set, specifies a string or pattern that should be at the end of the delimited string.
:param closer: If set, specifies a string or pattern that should be at the end of the delimited string. Note that providing
a pattern will slow down the parsing process.
:param strip: Whether or not to strip whitespaces from the beginning and end of each item.

.. code-block::
Expand Down Expand Up @@ -140,6 +141,36 @@ Utility Parsers
assert server_params_ev.get() == {"host": "localhost", "port": 8080, "is_ssl": False}
.. class:: FindIterCollectionParser(element_pattern: typing.Pattern, element_func: collections.abc.Callable[[re.Match], E], \
output_type: collections.abc.Callable[[collections.abc.Iterator[E]], G] = list, \
opener: str | typing.Pattern = '', closer: str | typing.Pattern = '')

A parser to translate a string to a collection of values by splitting the string to continguous elements that match
a regex pattern. This parser is useful for parsing strings that have a repeating, complex structure, or in cases where
a :class:`naive split <CollectionParser>` would split the string incorrectly.

:param element_pattern: A regex pattern to find the elements in the string.
:param element_func: A function that takes a regex match object and returns an element.
:param output_type: The type to use to aggregate the parsed items to a collection. Defaults to list.
:param opener: If set, specifies a string or pattern that should be at the beginning of the string.
:param closer: If set, specifies a string or pattern that should be at the end of the string. Note that providing
a pattern will slow down the parsing process.

.. code-block::
:caption: Using FindIterCollectionParser to parse a string of comma-separated groups of numbers.
def parse_group(match: re.Match) -> set[int]:
return {int(x) for x in match.group(1).split(',')}
groups_ev = env_var("GROUPS", type=FindIterCollectionParser(
re.compile(r"{([,\d]+)},?"),
parse_group
))
os.environ["GROUPS"] = "{1,2,3},{4,5,6},{7,8,9}"
assert groups_ev.get() == [{1, 2, 3}, {4, 5, 6}, {7, 8, 9}]
.. class:: MatchParser(cases: collections.abc.Iterable[tuple[typing.Pattern[str] | str, T]] | \
collections.abc.Mapping[str, T] | type[enum.Enum], fallback: T = ...)
Expand Down
2 changes: 1 addition & 1 deletion envolved/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.4.0"
__version__ = "1.5.0"
40 changes: 1 addition & 39 deletions envolved/envparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,50 +120,12 @@ def get(self, case_sensitive: bool, key: str) -> str:
return ret


class NonAuditingEnvParser(ReloadingEnvParser):
def get(self, case_sensitive: bool, key: str) -> str:
if case_sensitive:
return getenv_unsafe(key)

def out_of_date() -> str:
self.reload()
return get_case_insensitive(retry_allowed=False)

lowered = key.lower()

def get_case_insensitive(retry_allowed: bool) -> str:
if retry_allowed and lowered not in self.environ_case_insensitive:
# if a retry is allowed, and no candidates are available, we need to retry
return out_of_date()
candidates = self.environ_case_insensitive[lowered]
if key in candidates:
preferred_key = key
elif retry_allowed and has_env(key):
# key is not a candidate, but it is in the env
return out_of_date()
elif len(candidates) == 1:
(preferred_key,) = candidates
elif retry_allowed:
return out_of_date()
else:
raise CaseInsensitiveAmbiguityError(candidates)
ret = getenv(preferred_key)
if ret is None:
assert retry_allowed
return out_of_date()
return ret

return get_case_insensitive(retry_allowed=True)


EnvParser: Type[BaseEnvParser]
if name == "nt":
# in windows, all env vars are uppercase
EnvParser = CaseInsensitiveEnvParser
elif sys.version_info >= (3, 8): # adding audit hooks is only supported in python 3.8+
EnvParser = AuditingEnvParser
else:
EnvParser = NonAuditingEnvParser
EnvParser = AuditingEnvParser


env_parser = EnvParser()
Expand Down
152 changes: 99 additions & 53 deletions envolved/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import re
from enum import Enum, auto
from functools import lru_cache
from itertools import chain
from sys import version_info
from typing import (
Expand Down Expand Up @@ -138,6 +137,68 @@ def _duplicate_avoiding_dict(pairs: Iterator[Tuple[K, V]]) -> Dict[K, V]:
return ret


def strip_opener_idx(x: str, opener: Pattern[str]) -> int:
opener_match = opener.match(x)
if not opener_match:
raise ValueError("position 0, expected opener")
return opener_match.end()


def strip_closer_idx(x: str, closer: Needle, pos: int) -> int:
if isinstance(closer, str):
if len(closer) + pos > len(x) or not x.endswith(closer):
raise ValueError("expected string to end in closer")
return len(x) - len(closer)
else:
assert isinstance(closer, Pattern)
# now we have a problem, as the standard re module doesn't support reverse matches
closer_matches = closer.finditer(x, pos)
closer_match = None
for closer_match in closer_matches: # noqa: B007
# we iterate to find the last match
pass
if not closer_match:
raise ValueError("expected string to end in closer")
else:
while closer_match.end() != len(x):
# finditer could have missed on overlapping match, if there is an overlapping match
# it will be found after the start of the last match (but before its end)
closer_match = closer.search(x, closer_match.start() + 1)
# if there is a match, it's an overlapping match, but it doesn't neccessarily end at
# the end of the string
if not closer_match:
raise ValueError("expected string to end in closer")
return closer_match.start()


def strip_opener_and_closer(x: str, opener: Pattern[str], closer: Needle) -> str:
start_idx = strip_opener_idx(x, opener)
end_idx = strip_closer_idx(x, closer, start_idx)

if start_idx != 0 or end_idx != len(x):
return x[start_idx:end_idx]
return x


def value_parser_func(value_type: Union[ParserInput[V], Mapping[K, ParserInput[V]]]) -> Callable[[K], Parser[V]]:
if isinstance(value_type, Mapping):
value_parsers = {k: parser(v) for k, v in value_type.items()}

def get_value_parser(key: K) -> Parser[V]:
try:
return value_parsers[key]
except KeyError:
# in case the mapping has a default value or the like
return parser(value_type[key])
else:
_value_parser = parser(value_type)

def get_value_parser(key: K) -> Parser[V]:
return _value_parser

return get_value_parser


class CollectionParser(Generic[G, E]):
"""
A parser that splits a string by a delimiter, and parses each part individually.
Expand All @@ -149,45 +210,20 @@ def __init__(
inner_parser: ParserInput[E],
output_type: Callable[[Iterator[E]], G] = list, # type: ignore[assignment]
opener: Needle = empty_pattern,
closer: Needle = empty_pattern,
closer: Needle = "",
*,
strip: bool = True,
):
"""
:param delimiter: The delimiter to split by.
:param inner_parser: The inner parser to apply to each element.
:param output_type: The aggregator function of all the parsed elements.
:param opener: Optional opener that must be present at the start of the string.
:param closer: Optional closer that must be present at the end of the string.
"""
self.delimiter_pattern = needle_to_pattern(delimiter)
self.inner_parser = parser(inner_parser)
self.output_type = output_type
self.opener_pattern = needle_to_pattern(opener)
self.closer_pattern = needle_to_pattern(closer)
self.closer = closer
self.strip = strip

def __call__(self, x: str) -> G:
opener_match = self.opener_pattern.match(x)
if not opener_match:
raise ValueError("position 0, expected opener")
x = x[opener_match.end() :]
raw_elements = self.delimiter_pattern.split(x)
closer_matches = self.closer_pattern.finditer(raw_elements[-1])

closer_match = None
for closer_match in closer_matches: # noqa: B007
pass
if not closer_match:
raise ValueError("expected string to end in closer")
elif closer_match.end() != len(raw_elements[-1]):
raise ValueError(
"expected closer to match end of string, got unexpected suffix: "
+ raw_elements[-1][closer_match.end() :]
)

raw_elements[-1] = raw_elements[-1][: closer_match.start()]
raw_items = iter(raw_elements)
x = strip_opener_and_closer(x, self.opener_pattern, self.closer)
raw_items = iter(self.delimiter_pattern.split(x))
if self.strip:
raw_items = (r.strip() for r in raw_items)
elements = (self.inner_parser(r) for r in raw_items)
Expand All @@ -201,36 +237,14 @@ def pair_wise_delimited(
key_type: ParserInput[K],
value_type: Union[ParserInput[V], Mapping[K, ParserInput[V]]],
output_type: Callable[[Iterator[Tuple[K, V]]], G] = _duplicate_avoiding_dict, # type: ignore[assignment]
*,
key_first: bool = True,
strip_keys: bool = True,
strip_values: bool = True,
**kwargs: Any,
) -> Parser[G]:
"""
Create a collectionParser that aggregates to key-value pairs.
:param pair_delimiter: The separator between different key-value pairs.
:param key_value_delimiter: The separator between each key and value.
:param key_type: The parser for key elements.
:param value_type: The parser for value elements. Can also be a mapping, parsing each key under a different
parser.
:param output_type: The tuple aggregator function. Defaults to a duplicate-checking dict.
:param key_first: If set to false, will evaluate the part behind the key-value separator as a value.
:param kwargs: forwarded to `CollectionParser.__init__`
"""
key_value_delimiter = needle_to_pattern(key_value_delimiter)
key_parser = parser(key_type)
get_value_parser: Callable[[K], Parser]
if isinstance(value_type, Mapping):

@lru_cache(None)
def get_value_parser(key: K) -> Parser[V]:
return parser(value_type[key])
else:
_value_parser = parser(value_type)

def get_value_parser(key: K) -> Parser[V]:
return _value_parser
get_value_parser = value_parser_func(value_type)

def combined_parser(s: str) -> Tuple[K, V]:
split = key_value_delimiter.split(s, maxsplit=2)
Expand All @@ -250,6 +264,38 @@ def combined_parser(s: str) -> Tuple[K, V]:
return cls(pair_delimiter, combined_parser, output_type, **kwargs) # type: ignore[arg-type]


def find_iter_contingient(x: str, pattern: Pattern[str]) -> Iterator[re.Match[str]]:
start_idx = 0
while start_idx < len(x):
match = pattern.match(x, start_idx)
if match is None:
raise ValueError(f"could not match pattern {pattern} at position {start_idx}")
start_idx = match.end()
yield match


class FindIterCollectionParser(Generic[G, E]):
def __init__(
self,
element_pattern: Pattern[str],
element_func: Callable[[re.Match[str]], E],
output_type: Callable[[Iterator[E]], G] = list, # type: ignore[assignment]
opener: Needle = empty_pattern,
closer: Needle = "",
):
self.prefix_pattern = element_pattern
self.element_func = element_func
self.output_type = output_type
self.opener_pattern = needle_to_pattern(opener)
self.closer = closer

def __call__(self, x: str) -> G:
x = strip_opener_and_closer(x, self.opener_pattern, self.closer)
raw_matches = find_iter_contingient(x, self.prefix_pattern)
elements = (self.element_func(r) for r in raw_matches)
return self.output_type(elements)


class NoFallback(Enum):
no_fallback = auto()

Expand Down
15 changes: 8 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "envolved"
version = "1.4.0"
version = "1.5.0"
description = ""
authors = ["ben avrahami <[email protected]>"]
license = "MIT"
Expand All @@ -12,11 +12,8 @@ packages = [
]

[tool.poetry.dependencies]
python = "^3.7"
typing-extensions = [
{version="<4.8.0", python=">=3.7, <3.8"},
{version="*", python=">=3.8"},
]
python = "^3.8"
typing-extensions = "*"

[tool.poetry.group.dev.dependencies]
pytest = "*"
Expand All @@ -39,7 +36,7 @@ build-backend = "poetry.masonry.api"


[tool.ruff]
target-version = "py37"
target-version = "py38"
line-length = 120
output-format = "full"
[tool.ruff.lint]
Expand Down Expand Up @@ -108,3 +105,7 @@ keep-runtime-typing = true
"PTH", # use pathlib
"PERF", # performance anti-patterns
]

"type_checking/**" = [
"INP001", # implicit namespace packages
]
1 change: 1 addition & 0 deletions scripts/test_type_hinting.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python -m mypy --show-error-codes --check-untyped-defs type_checking
Loading

0 comments on commit 02d291f

Please sign in to comment.