Skip to content

Commit

Permalink
Bump to v0.1.1 (#1)
Browse files Browse the repository at this point in the history
* Rename .html to .lxml
* use `lxml.html.fromstring` and `lxml.html.tostring` to process HTML
* use `lxml.etree.fromstring` and `lxml.etree.tostring` to process XML
* Add check_isort, check_black, check, check_all, fc: format_code into Makefile
* Update .travis.yml for linting check by isort and black
* Bump to v0.1.1
  • Loading branch information
linw1995 authored Apr 19, 2019
1 parent 503e4fb commit 4d40273
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 28 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ dmypy.json
.vscode
## Makefile
.black
.check_black
.check_isort
.develop
.flake
.isort
Expand Down
8 changes: 8 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ _helpers:

jobs:
include:
- <<: *_lint_base
name: Linting source code with isort
script:
- make check_isort
- <<: *_lint_base
name: Linting source code with black
script:
- make check_black
- <<: *_lint_base
name: Linting source code with flake8
script:
Expand Down
10 changes: 6 additions & 4 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Changelog

## v0.1.0
## v0.1.1

- initialize project
- add Extractor to extract data from the text which format is HTML or JSON.
- add complex extractor: Field, Item
- Rename `.html` to `.lxml`; Remove `fromstring`, `tostring` function from `.lxml`
* Rename .html to .lxml
* use `lxml.html.fromstring` and `lxml.html.tostring` to process HTML
* use `lxml.etree.fromstring` and `lxml.etree.tostring` to process XML
- Add **check_isort**, **check_black**, **check**, **check_all**, **fc**: **format_code** into Makefile for development.
5 changes: 5 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## v0.1.0

- initialize project
- add Extractor to extract data from the text which format is HTML or JSON.
- add complex extractor: Field, Item
45 changes: 38 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ isort: .isort
isort -rc data_extractor tests
@touch .isort

check_isort: .check_isort

.check_isort: $(shell find data_extractor -type d) \
$(shell find tests -type d)
isort -rc -c data_extractor tests
@touch .check_isort

flake: .flake

.flake: $(shell find data_extractor -type d) \
Expand All @@ -53,30 +60,54 @@ black: .black
black data_extractor tests
@touch .black

check_black: .check_black

.check_black: $(shell find data_extractor -type d) \
$(shell find tests -type d)
black --check data_extractor tests
@touch .check_black

mypy: .mypy

.mypy: $(shell find data_extractor -type d)
mypy data_extractor
@touch .mypy

.develop: $(shell find data_extractor -type d) .isort .black .flake
@touch .develop
check: .check

.check: $(shell find data_extractor -type d) \
$(shell find tests -type d) \
.check_isort .check_black .flake

check_all: .check_all

.check_all: $(shell find data_extractor -type d) \
$(shell find tests -type d) \
.check mypy

format_code: .format_code
fc: .format_code

.format_code: $(shell find data_extractor -type d) \
$(shell find tests -type d) \
.isort .black

test: .develop
test: .check
pytest -q -x --ff --nf

vtest: .develop
vtest: .check
pytest -vv -x --ff --nf

cov: .develop
cov: .check
pytest -vv --cov=data_extractor
coverage html
@echo "open file://`pwd`/htmlcov/index.html to see coverage"

clean:
@rm -f .black
@rm -f .coverage
@rm -f .develop
@rm -f .check_isort
@rm -f .check_black
@rm -f .flake
@rm -f .isort
@rm -f .mypy
Expand All @@ -91,7 +122,7 @@ build:
python setup.py sdist bdist_wheel


.PHONY: all isort flake black mypy test vtest cov clean build
.PHONY: all check check_isort check_black fc flake black isort mypy test vtest cov clean build

endif
endif
2 changes: 1 addition & 1 deletion data_extractor/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.0"
__version__ = "0.1.1"
15 changes: 6 additions & 9 deletions data_extractor/html.py → data_extractor/lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,19 @@
from typing import List, Union

# Third Party Library
from lxml.etree import _Element as HTMLElement
from lxml.html import fromstring, tostring
from lxml.etree import _Element as Element

# Local Folder
from .abc import AbstractExtractor


class CSSExtractor(AbstractExtractor):
def extract(self, element: HTMLElement) -> List[HTMLElement]:
def extract(self, element: Element) -> List[Element]:
return element.cssselect(self.expr)


class TextCSSExtractor(AbstractExtractor):
def extract(self, element: HTMLElement) -> List[str]:
def extract(self, element: Element) -> List[str]:
return [ele.text for ele in CSSExtractor(self.expr).extract(element)]


Expand All @@ -27,7 +26,7 @@ def __init__(self, expr: str, attr: str):
def __repr__(self) -> str:
return f"{self.__class__.__name__}(expr={self.expr!r}, attr={self.attr!r})"

def extract(self, root: HTMLElement) -> List[str]:
def extract(self, root: Element) -> List[str]:
return [
ele.get(self.attr)
for ele in CSSExtractor(self.expr).extract(root)
Expand All @@ -36,16 +35,14 @@ def extract(self, root: HTMLElement) -> List[str]:


class XPathExtractor(AbstractExtractor):
def extract(self, element: HTMLElement) -> Union[List["HTMLElement"], List[str]]:
def extract(self, element: Element) -> Union[List["Element"], List[str]]:
return element.xpath(self.expr)


__all__ = (
"AttrCSSExtractor",
"CSSExtractor",
"HTMLElement",
"Element",
"TextCSSExtractor",
"XPathExtractor",
"fromstring",
"tostring",
)
4 changes: 3 additions & 1 deletion tests/test_item.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Third Party Library
import pytest

from lxml.html import fromstring

# Dsipder Module
from data_extractor.html import XPathExtractor, fromstring
from data_extractor.item import Field, Item
from data_extractor.lxml import XPathExtractor


@pytest.fixture(scope="module")
Expand Down
9 changes: 3 additions & 6 deletions tests/test_html.py → tests/test_lxml.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
# Third Party Library
import pytest

from lxml.html import fromstring

# Dsipder Module
from data_extractor.html import (
AttrCSSExtractor,
TextCSSExtractor,
XPathExtractor,
fromstring,
)
from data_extractor.lxml import AttrCSSExtractor, TextCSSExtractor, XPathExtractor


@pytest.fixture(scope="module")
Expand Down

0 comments on commit 4d40273

Please sign in to comment.