diff --git a/.gitignore b/.gitignore index a657c83..e3b6ea0 100644 --- a/.gitignore +++ b/.gitignore @@ -134,6 +134,8 @@ dmypy.json .vscode ## Makefile .black +.check_black +.check_isort .develop .flake .isort diff --git a/.travis.yml b/.travis.yml index b8c1c75..a0908dc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -48,6 +48,14 @@ _helpers: jobs: include: + - <<: *_lint_base + name: Linting source code with isort + script: + - make check_isort + - <<: *_lint_base + name: Linting source code with black + script: + - make check_black - <<: *_lint_base name: Linting source code with flake8 script: diff --git a/CHANGES.md b/CHANGES.md index 9c19187..8d41ebe 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,7 +1,9 @@ # Changelog -## v0.1.0 +## v0.1.1 -- initialize project -- add Extractor to extract data from the text which format is HTML or JSON. -- add complex extractor: Field, Item +- Rename `.html` to `.lxml`; Remove `fromstring`, `tostring` function from `.lxml` + * Rename .html to .lxml + * use `lxml.html.fromstring` and `lxml.html.tostring` to process HTML + * use `lxml.etree.fromstring` and `lxml.etree.tostring` to process XML +- Add **check_isort**, **check_black**, **check**, **check_all**, **fc**: **format_code** into Makefile for development. diff --git a/HISTORY.md b/HISTORY.md index e69de29..451e905 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -0,0 +1,5 @@ +## v0.1.0 + +- initialize project +- add Extractor to extract data from the text which format is HTML or JSON. +- add complex extractor: Field, Item diff --git a/Makefile b/Makefile index 64d3a91..feb4443 100644 --- a/Makefile +++ b/Makefile @@ -39,6 +39,13 @@ isort: .isort isort -rc data_extractor tests @touch .isort +check_isort: .check_isort + +.check_isort: $(shell find data_extractor -type d) \ + $(shell find tests -type d) + isort -rc -c data_extractor tests + @touch .check_isort + flake: .flake .flake: $(shell find data_extractor -type d) \ @@ -53,22 +60,45 @@ black: .black black data_extractor tests @touch .black +check_black: .check_black + +.check_black: $(shell find data_extractor -type d) \ + $(shell find tests -type d) + black --check data_extractor tests + @touch .check_black + mypy: .mypy .mypy: $(shell find data_extractor -type d) mypy data_extractor @touch .mypy -.develop: $(shell find data_extractor -type d) .isort .black .flake - @touch .develop +check: .check + +.check: $(shell find data_extractor -type d) \ + $(shell find tests -type d) \ + .check_isort .check_black .flake + +check_all: .check_all + +.check_all: $(shell find data_extractor -type d) \ + $(shell find tests -type d) \ + .check mypy + +format_code: .format_code +fc: .format_code + +.format_code: $(shell find data_extractor -type d) \ + $(shell find tests -type d) \ + .isort .black -test: .develop +test: .check pytest -q -x --ff --nf -vtest: .develop +vtest: .check pytest -vv -x --ff --nf -cov: .develop +cov: .check pytest -vv --cov=data_extractor coverage html @echo "open file://`pwd`/htmlcov/index.html to see coverage" @@ -76,7 +106,8 @@ cov: .develop clean: @rm -f .black @rm -f .coverage - @rm -f .develop + @rm -f .check_isort + @rm -f .check_black @rm -f .flake @rm -f .isort @rm -f .mypy @@ -91,7 +122,7 @@ build: python setup.py sdist bdist_wheel -.PHONY: all isort flake black mypy test vtest cov clean build +.PHONY: all check check_isort check_black fc flake black isort mypy test vtest cov clean build endif endif diff --git a/data_extractor/__init__.py b/data_extractor/__init__.py index 3dc1f76..485f44a 100644 --- a/data_extractor/__init__.py +++ b/data_extractor/__init__.py @@ -1 +1 @@ -__version__ = "0.1.0" +__version__ = "0.1.1" diff --git a/data_extractor/html.py b/data_extractor/lxml.py similarity index 69% rename from data_extractor/html.py rename to data_extractor/lxml.py index af0fdc5..09d16cc 100644 --- a/data_extractor/html.py +++ b/data_extractor/lxml.py @@ -2,20 +2,19 @@ from typing import List, Union # Third Party Library -from lxml.etree import _Element as HTMLElement -from lxml.html import fromstring, tostring +from lxml.etree import _Element as Element # Local Folder from .abc import AbstractExtractor class CSSExtractor(AbstractExtractor): - def extract(self, element: HTMLElement) -> List[HTMLElement]: + def extract(self, element: Element) -> List[Element]: return element.cssselect(self.expr) class TextCSSExtractor(AbstractExtractor): - def extract(self, element: HTMLElement) -> List[str]: + def extract(self, element: Element) -> List[str]: return [ele.text for ele in CSSExtractor(self.expr).extract(element)] @@ -27,7 +26,7 @@ def __init__(self, expr: str, attr: str): def __repr__(self) -> str: return f"{self.__class__.__name__}(expr={self.expr!r}, attr={self.attr!r})" - def extract(self, root: HTMLElement) -> List[str]: + def extract(self, root: Element) -> List[str]: return [ ele.get(self.attr) for ele in CSSExtractor(self.expr).extract(root) @@ -36,16 +35,14 @@ def extract(self, root: HTMLElement) -> List[str]: class XPathExtractor(AbstractExtractor): - def extract(self, element: HTMLElement) -> Union[List["HTMLElement"], List[str]]: + def extract(self, element: Element) -> Union[List["Element"], List[str]]: return element.xpath(self.expr) __all__ = ( "AttrCSSExtractor", "CSSExtractor", - "HTMLElement", + "Element", "TextCSSExtractor", "XPathExtractor", - "fromstring", - "tostring", ) diff --git a/tests/test_item.py b/tests/test_item.py index 5e9d39a..e457446 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -1,9 +1,11 @@ # Third Party Library import pytest +from lxml.html import fromstring + # Dsipder Module -from data_extractor.html import XPathExtractor, fromstring from data_extractor.item import Field, Item +from data_extractor.lxml import XPathExtractor @pytest.fixture(scope="module") diff --git a/tests/test_html.py b/tests/test_lxml.py similarity index 97% rename from tests/test_html.py rename to tests/test_lxml.py index ee0d815..ca12f84 100644 --- a/tests/test_html.py +++ b/tests/test_lxml.py @@ -1,13 +1,10 @@ # Third Party Library import pytest +from lxml.html import fromstring + # Dsipder Module -from data_extractor.html import ( - AttrCSSExtractor, - TextCSSExtractor, - XPathExtractor, - fromstring, -) +from data_extractor.lxml import AttrCSSExtractor, TextCSSExtractor, XPathExtractor @pytest.fixture(scope="module")