From a6570f60136d47d8bc27c2c54e297d52af69f936 Mon Sep 17 00:00:00 2001 From: Brent Yi Date: Tue, 5 Oct 2021 14:33:47 -0700 Subject: [PATCH] Tests, infra, minor bug fixes --- .flake8 | 6 ++ .github/workflows/build.yml | 28 ++++++++ .github/workflows/lint.yml | 17 +++++ .github/workflows/publish.yml | 31 ++++++++ .gitignore | 2 + README.md | 25 ++++--- dcargs/_arguments.py | 22 +++--- dcargs/_docstrings.py | 5 +- dcargs/_parse.py | 27 +++---- dcargs/_parsers.py | 23 ++++-- example.py | 42 ----------- example_more_elaborate.py | 47 ------------- setup.py | 9 ++- tests/test_dcargs.py | 129 ++++++++++++++++++++++++++++++++++ tests/test_forward_ref.py | 54 ++++++++++++++ 15 files changed, 333 insertions(+), 134 deletions(-) create mode 100644 .flake8 create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/publish.yml delete mode 100644 example.py delete mode 100644 example_more_elaborate.py create mode 100644 tests/test_dcargs.py create mode 100644 tests/test_forward_ref.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..a0bebef2 --- /dev/null +++ b/.flake8 @@ -0,0 +1,6 @@ +[flake8] +# E203: whitespace before : +# E501: line too long ( characters) +# W503: line break before binary operator +; ignore = E203,E501,D100,D101,D102,D103,W503 +ignore = E203,E501,W503 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..f77bc0b0 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,28 @@ +name: build + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.7", "3.8", "3.9"] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[testing]" + - name: Test with pytest + run: | + pytest diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..5df6615b --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,17 @@ +name: lint + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + black-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: Black Code Formatter + uses: lgeiger/black-action@master + with: + args: ". --check" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..d5f3859d --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,31 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.gitignore b/.gitignore index be356216..d2104f16 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ __pycache__ .hypothesis .ipynb_checkpoints .cache +build/ +dist/ diff --git a/README.md b/README.md index 1f4971a2..c905ed0e 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,22 @@ # dcargs -`dcargs` is argparse + datclasses, with the goal of generating portable, -reusable, and strongly typed CLI interfaces. +`dcargs` is a tool for generating portable, reusable, and strongly typed CLI +interfaces from dataclass definitions. -We expose only one function, which takes a dataclass type and instantiates it -via CLI flags: +We expose one function, `parse(Type[T]) -> T`, which takes a dataclass type and +instantiates it via an argparse-style CLI interface: ```python -# Importable via dcargs.parse -def parse(cls: Type[DataclassType], description: str = "") -> DataclassType: - ... +import dataclasses + +import dcargs + +@dataclasses.dataclass +class Args: + field1: str + field2: int + +args = dcargs.parse(Args) ``` The parse function supports dataclasses containing: @@ -18,8 +25,8 @@ The parse function supports dataclasses containing: - [x] Boolean flags - [x] Enums (via `enum.Enum`) - [x] Optional types -- [x] Literal types (by populating `choices`) -- [ ] Sequence and list types (by populating `nargs`) +- [x] Literal types (populates `choices`) +- [ ] Sequence and list types - [x] Forward references (including in unions) - [x] Automatic helptext generation - [x] Nested dataclasses diff --git a/dcargs/_arguments.py b/dcargs/_arguments.py index 686666ea..409e9833 100644 --- a/dcargs/_arguments.py +++ b/dcargs/_arguments.py @@ -1,17 +1,9 @@ import dataclasses import enum -from typing import ( - TYPE_CHECKING, - Any, - Callable, - List, - Literal, - Optional, - Set, - Type, - Union, - get_type_hints, -) +from typing import (TYPE_CHECKING, Any, Callable, List, Optional, Set, Type, + Union) + +from typing_extensions import Literal # Python 3.7 compat from . import _docstrings @@ -73,6 +65,8 @@ def _bool_flags(arg: ArgumentDefinition) -> None: """For booleans, we use a `store_true` action.""" if arg.type is not bool: return + + # TODO: what if the default value of the field is set to true by the user? arg.action = "store_true" arg.type = None arg.default = False @@ -100,7 +94,7 @@ def _handle_optionals(arg: ArgumentDefinition) -> None: not required.""" field = arg.field if hasattr(field.type, "__origin__") and field.type.__origin__ is Union: - options = set(get_type_hints(arg.parent_class)[field.name].__args__) + options = set(field.type.__args__) assert ( len(options) == 2 and type(None) in options ), "Union must be either over dataclasses (for subparsers) or Optional" @@ -125,7 +119,7 @@ def _choices_from_literals(arg: ArgumentDefinition) -> None: def _enums_as_strings(arg: ArgumentDefinition) -> None: """For enums, use string representations.""" - if arg.type is not None and issubclass(arg.type, enum.Enum): + if type(arg.type) is type and issubclass(arg.type, enum.Enum): if arg.choices is None: arg.choices = set(x.name for x in arg.type) else: diff --git a/dcargs/_docstrings.py b/dcargs/_docstrings.py index 474b79b8..deb77766 100644 --- a/dcargs/_docstrings.py +++ b/dcargs/_docstrings.py @@ -29,8 +29,7 @@ def make(cls) -> "_Tokenization": if toktype in (tokenize.NEWLINE, tokenize.NL): line_number += 1 tokens_from_line[line_number] = [] - elif toktype != tokenize.INDENT: - # Add everything else except for whitespace + elif toktype is not tokenize.INDENT: token = _Token(token_type=toktype, token=tok, line_number=line_number) tokens.append(token) tokens_from_line[line_number].append(token) @@ -82,7 +81,7 @@ def get_field_docstring(cls: Type, field_name: str) -> str: return comment[1:].strip() # Check for comment on the line before the field - # TODO: this will likely results in unintentional docstrings? + # TODO: this may result in unintentional helptext? comment_index = field_index comments: List[str] = [] while True: diff --git a/dcargs/_parse.py b/dcargs/_parse.py index c24f598c..07ba870d 100644 --- a/dcargs/_parse.py +++ b/dcargs/_parse.py @@ -1,7 +1,7 @@ import argparse import dataclasses import enum -from typing import Any, ClassVar, Dict, Generic, Type, TypeVar, Union, get_type_hints +from typing import Any, Dict, Optional, Sequence, Type, TypeVar, Union from . import _strings from ._parsers import Parser, ParserDefinition @@ -9,23 +9,27 @@ DataclassType = TypeVar("DataclassType") -def parse(cls: Type[DataclassType], description: str = "") -> DataclassType: +def parse( + cls: Type[DataclassType], + description: str = "", + args: Optional[Sequence[str]] = None, +) -> DataclassType: """Populate a dataclass via CLI args.""" assert dataclasses.is_dataclass(cls) - parser_definition = ParserDefinition.from_dataclass(cls) + parser_definition = ParserDefinition.from_dataclass(cls, parent_dataclasses=set()) root_parser = argparse.ArgumentParser( description=_strings.dedent(description), formatter_class=argparse.RawTextHelpFormatter, ) parser_definition.apply(Parser.make(root_parser)) - namespace = root_parser.parse_args() + namespace = root_parser.parse_args(args) - return construct_dataclass(cls, vars(namespace)) + return _construct_dataclass(cls, vars(namespace)) -def construct_dataclass( +def _construct_dataclass( cls: Type[DataclassType], values: Dict[str, Any] ) -> DataclassType: """Construct a dataclass object from a dictionary of values from argparse.""" @@ -34,6 +38,7 @@ def construct_dataclass( fields = dataclasses.fields(cls) kwargs: Dict[str, Any] = {} + for field in fields: if not field.init: continue @@ -47,7 +52,7 @@ def construct_dataclass( # Nested dataclasses elif dataclasses.is_dataclass(field.type): arg_prefix = field.name + _strings.NESTED_DATACLASS_DELIMETER - value = construct_dataclass( + value = _construct_dataclass( field.type, values={ k[len(arg_prefix) :]: v @@ -60,20 +65,18 @@ def construct_dataclass( elif ( hasattr(field.type, "__origin__") and field.type.__origin__ is Union - and all( - map(dataclasses.is_dataclass, get_type_hints(cls)[field.name].__args__) - ) + and all(map(dataclasses.is_dataclass, field.type.__args__)) ): subparser_dest = _strings.SUBPARSER_DEST_FMT.format(name=field.name) assert subparser_dest in values.keys() - options = get_type_hints(cls)[field.name].__args__ + options = field.type.__args__ chosen_cls = None for option in options: if option.__name__ == values[subparser_dest]: chosen_cls = option break assert chosen_cls is not None - value = construct_dataclass(chosen_cls, values) + value = _construct_dataclass(chosen_cls, values) # General case else: diff --git a/dcargs/_parsers.py b/dcargs/_parsers.py index cbae749e..0ce067c7 100644 --- a/dcargs/_parsers.py +++ b/dcargs/_parsers.py @@ -1,6 +1,6 @@ import argparse import dataclasses -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union, get_type_hints +from typing import Any, Dict, List, Optional, Set, Type, Union, get_type_hints from . import _strings from ._arguments import ArgumentDefinition @@ -51,22 +51,33 @@ def apply(self, parsers: Parser) -> None: subparser_def.apply(Parser.make(subparser)) @staticmethod - def from_dataclass(cls: Type[Any]) -> "ParserDefinition": + def from_dataclass( + cls: Type[Any], parent_dataclasses: Set[Type] = set() + ) -> "ParserDefinition": """Create a parser definition from a dataclass.""" assert dataclasses.is_dataclass(cls) + assert ( + cls not in parent_dataclasses + ), f"Found a cyclic dataclass dependency with type {cls}" args = [] subparsers = None + annotations = get_type_hints(cls) for field in dataclasses.fields(cls): if not field.init: continue + # Resolve forward references + field.type = annotations[field.name] + vanilla_field: bool = True # Add arguments for nested dataclasses if dataclasses.is_dataclass(field.type): - child_definition = ParserDefinition.from_dataclass(field.type) + child_definition = ParserDefinition.from_dataclass( + field.type, parent_dataclasses | {cls} + ) child_args = child_definition.args for arg in child_args: arg.name = ( @@ -82,7 +93,7 @@ def from_dataclass(cls: Type[Any]) -> "ParserDefinition": # Unions of dataclasses should create subparsers if hasattr(field.type, "__origin__") and field.type.__origin__ is Union: - options = get_type_hints(cls)[field.name].__args__ + options = field.type.__args__ if all(map(dataclasses.is_dataclass, options)): assert ( subparsers is None @@ -91,7 +102,9 @@ def from_dataclass(cls: Type[Any]) -> "ParserDefinition": subparsers = SubparsersDefinition( name=field.name, parsers={ - option.__name__: ParserDefinition.from_dataclass(option) + option.__name__: ParserDefinition.from_dataclass( + option, parent_dataclasses | {cls} + ) for option in options }, ) diff --git a/example.py b/example.py deleted file mode 100644 index 8d1180d9..00000000 --- a/example.py +++ /dev/null @@ -1,42 +0,0 @@ -"""An argument parsing example. - -Note that there are multiple possible ways to document dataclass attributes, all of -which are supported by the automatic helptext generator. -""" - -import dataclasses -import enum - -import dcargs - - -class OptimizerType(enum.Enum): - ADAM = enum.auto() - SGD = enum.auto() - - -@dataclasses.dataclass -class OptimizerConfig: - # Variant of SGD to use. - type: OptimizerType - - # Learning rate to use. - learning_rate: float = 3e-4 - - # Coefficient for L2 regularization. - weight_decay: float = 1e-2 - - -@dataclasses.dataclass -class ExperimentConfig: - experiment_name: str # Experiment name to use. - - optimizer: OptimizerConfig - - seed: int = 0 - """Random seed. This is helpful for making sure that our experiments are - all reproducible!""" - - -config = dcargs.parse(ExperimentConfig, description=__doc__) -print(config) diff --git a/example_more_elaborate.py b/example_more_elaborate.py deleted file mode 100644 index a1680d71..00000000 --- a/example_more_elaborate.py +++ /dev/null @@ -1,47 +0,0 @@ -import dataclasses -import enum -from typing import Literal, Optional, Union - -import dcargs - -if __name__ == "__main__": - - class Color(enum.Enum): - RED = enum.auto() - GREEN = enum.auto() - BLUE = enum.auto() - - @dataclasses.dataclass - class A: - """Docstring for A.""" - - a: int - - @dataclasses.dataclass - class B: - """Docstring for B.""" - - b: int - bc: Union["B", "C"] - - @dataclasses.dataclass - class C: - """Docstring for C.""" - - c: float - - @dataclasses.dataclass - class Args: - """Arguments.""" - - w: Optional[float] - flag: bool - ab: Union[A, B] - a: A - number: Literal[1, 2, 5] # One of three numbers - color: Literal[Color.RED, Color.GREEN] - x_what: int = 5 - y: int = dataclasses.field(default_factory=lambda: 8) - - arg = dcargs.parse(Args) - arg.color diff --git a/setup.py b/setup.py index 46f0c9ee..3abe5e03 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,14 @@ from setuptools import find_packages, setup +with open("README.md", "r") as fh: + long_description = fh.read() + setup( name="dcargs", version="0.0", - description="Automagic CLIs with dataclasses", + description="Portable, reusable, strongly typed CLIs from dataclass definitions", + long_description=long_description, + long_description_content_type="text/markdown", url="http://github.com/brentyi/dcargs", author="brentyi", author_email="brentyi@berkeley.edu", @@ -11,7 +16,7 @@ packages=find_packages(), package_data={"dcargs": ["py.typed"]}, python_requires=">=3.7", - install_requires=[], + install_requires=["typing_extensions"], extras_require={ "testing": [ "pytest", diff --git a/tests/test_dcargs.py b/tests/test_dcargs.py new file mode 100644 index 00000000..c691253c --- /dev/null +++ b/tests/test_dcargs.py @@ -0,0 +1,129 @@ +import dataclasses +import io +from contextlib import redirect_stdout +from typing import Optional, Union + +import pytest +from typing_extensions import Literal # Python 3.7 compat + +import dcargs + + +def test_basic(): + @dataclasses.dataclass + class A: + x: int + + assert dcargs.parse(A, args=["--x", "5"]) == A(x=5) + + +def test_required(): + @dataclasses.dataclass + class A: + x: int + + with pytest.raises(SystemExit): + dcargs.parse(A, args=[]) + + +def test_flag(): + @dataclasses.dataclass + class A: + x: bool + + assert dcargs.parse(A, args=[]) == A(False) + assert dcargs.parse(A, args=["--x"]) == A(True) + + +def test_flag_inv(): + # This is a weird but currently expected behavior: the default values of boolean + # flags are ignored. Should think harder about how this is handled. + @dataclasses.dataclass + class A: + x: bool = True + + assert dcargs.parse(A, args=[]) == A(False) + assert dcargs.parse(A, args=["--x"]) == A(True) + + +def test_default(): + @dataclasses.dataclass + class A: + x: int = 5 + + assert dcargs.parse(A, args=[]) == A() + + +def test_optional(): + @dataclasses.dataclass + class A: + x: Optional[int] + + assert dcargs.parse(A, args=[]) == A(x=None) + + +def test_literal(): + @dataclasses.dataclass + class A: + x: Literal[0, 1, 2] + + assert dcargs.parse(A, args=["--x", "1"]) == A(x=1) + with pytest.raises(SystemExit): + assert dcargs.parse(A, args=["--x", "3"]) + + +def test_nested(): + @dataclasses.dataclass + class B: + y: int + + @dataclasses.dataclass + class A: + x: int + b: B + + assert dcargs.parse(A, args=["--x", "1", "--b-y", "3"]) == A(x=1, b=B(y=3)) + + +def test_subparser(): + @dataclasses.dataclass + class B: + y: int + + @dataclasses.dataclass + class C: + z: int + + @dataclasses.dataclass + class A: + x: int + bc: Union[B, C] + + assert dcargs.parse(A, args=["--x", "1", "B", "--y", "3"]) == A(x=1, bc=B(y=3)) + assert dcargs.parse(A, args=["--x", "1", "C", "--z", "3"]) == A(x=1, bc=C(z=3)) + + with pytest.raises(SystemExit): + dcargs.parse(A, args=["--x", "1", "B", "--z", "3"]) + with pytest.raises(SystemExit): + dcargs.parse(A, args=["--x", "1", "C", "--y", "3"]) + + +def test_helptext(): + @dataclasses.dataclass + class Helptext: + x: int # Documentation 1 + + # Documentation 2 + y: int + + z: int + """Documentation 3""" + + f = io.StringIO() + with pytest.raises(SystemExit): + with redirect_stdout(f): + dcargs.parse(Helptext, args=["--help"]) + helptext = f.getvalue() + assert "--x X Documentation 1 (int)" in helptext + assert "--y Y Documentation 2 (int)" in helptext + assert "--z Z Documentation 3 (int)" in helptext diff --git a/tests/test_forward_ref.py b/tests/test_forward_ref.py new file mode 100644 index 00000000..b1f84bb4 --- /dev/null +++ b/tests/test_forward_ref.py @@ -0,0 +1,54 @@ +import dataclasses +from typing import Union + +import pytest + +import dcargs + + +@dataclasses.dataclass +class A1: + x: int + bc: "Union[B, C]" + + +@dataclasses.dataclass +class A2: + x: int + bc: Union["B", "C"] + + +@dataclasses.dataclass +class B: + y: "int" + + +@dataclasses.dataclass +class C: + z: int + + +def test_forward_ref_1(): + + assert dcargs.parse(A1, args=["--x", "1", "B", "--y", "3"]) == A1(x=1, bc=B(y=3)) + assert dcargs.parse(A1, args=["--x", "1", "C", "--z", "3"]) == A1(x=1, bc=C(z=3)) + + with pytest.raises(SystemExit): + dcargs.parse(A1, args=["--x", "1", "B", "--z", "3"]) + with pytest.raises(SystemExit): + dcargs.parse(A1, args=["--x", "1", "C", "--y", "3"]) + + +def test_forward_ref_2(): + + assert dcargs.parse(A2, args=["--x", "1", "B", "--y", "3"]) == A2(x=1, bc=B(y=3)) + assert dcargs.parse(A2, args=["--x", "1", "C", "--z", "3"]) == A2(x=1, bc=C(z=3)) + + with pytest.raises(SystemExit): + dcargs.parse(A2, args=["--x", "1", "B", "--z", "3"]) + with pytest.raises(SystemExit): + dcargs.parse(A2, args=["--x", "1", "C", "--y", "3"]) + + +test_forward_ref_1() +test_forward_ref_2()