diff --git a/pyproject.toml b/pyproject.toml index cd13636..9a2f292 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,10 +29,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] -packages = [ - {include = "rss_parser"}, - {include = "rss_parser/py.typed"}, -] +packages = [{ include = "rss_parser" }, { include = "rss_parser/py.typed" }] [tool.poetry.urls] @@ -54,7 +51,11 @@ rich = "*" pytest = "^7.4.0" [tool.pytest.ini_options] -addopts = "-color=yes" +addopts = "--color=yes" +testpaths = ["tests"] +log_cli = true +log_level = "INFO" + [tool.black] line-length = 120 @@ -65,36 +66,33 @@ line-length = 120 target-version = "py38" respect-gitignore = true select = [ - "PL", # pylint - "F", # pyflakes - "E", # pycodestyle errors - "W", # pycodestyle warnings - "I", # isort - "N", # pep8-naming - "S", # flake8-bandit - "A", # flake8-builtins - "C40", # flake8-comprehensions - "T10", # flake8-debugger - "EXE", # flake8-executable - "T20", # flake8-print - "TID", # flake8-tidy-imports - "TCH", # flake8-type-checking - "ARG", # flake8-unused-arguments - "RUF", # ruff + "PL", # pylint + "F", # pyflakes + "E", # pycodestyle errors + "W", # pycodestyle warnings + "I", # isort + "N", # pep8-naming + "S", # flake8-bandit + "A", # flake8-builtins + "C40", # flake8-comprehensions + "T10", # flake8-debugger + "EXE", # flake8-executable + "T20", # flake8-print + "TID", # flake8-tidy-imports + "TCH", # flake8-type-checking + "ARG", # flake8-unused-arguments + "RUF", # ruff ] [tool.ruff.per-file-ignores] "tests/**.py" = [ - "S101", # Use of assert detected - "ARG001", # Unused function argument - "S311", # Allow use of random -] -"**/__init__.py" = [ - "F401" -] -"rss_parser/models/atom/**" = [ - "A003" + "S101", # Use of assert detected + "ARG001", # Unused function argument + "S311", # Allow use of random + "S301", # Allow use of pickle ] +"**/__init__.py" = ["F401"] +"rss_parser/models/atom/**" = ["A003"] [build-system] diff --git a/rss_parser/models/__init__.py b/rss_parser/models/__init__.py index d7a5cfa..2de4b66 100644 --- a/rss_parser/models/__init__.py +++ b/rss_parser/models/__init__.py @@ -13,8 +13,6 @@ class XMLBaseModel(pydantic.BaseModel): class Config: - # Not really sure if we want for the schema obj to be immutable, disabling for now - # allow_mutation = False alias_generator = camel_case def json_plain(self, **kw): diff --git a/rss_parser/models/rss/channel.py b/rss_parser/models/rss/channel.py index d7fa336..6cd1329 100644 --- a/rss_parser/models/rss/channel.py +++ b/rss_parser/models/rss/channel.py @@ -36,7 +36,6 @@ class OptionalChannelElementsMixin(XMLBaseModel): copyright: Optional[Tag[str]] = None # Copyright 2002, Spartanburg Herald-Journal # noqa "Copyright notice for content in the channel." - managing_editor: Optional[Tag[str]] = None # geo@herald.com (George Matesky) "Email address for person responsible for editorial content." web_master: Optional[Tag[str]] = None # betty@herald.com (Betty Guernsey) diff --git a/rss_parser/models/rss/item.py b/rss_parser/models/rss/item.py index 72fbdbd..17da5f2 100644 --- a/rss_parser/models/rss/item.py +++ b/rss_parser/models/rss/item.py @@ -1,14 +1,18 @@ from typing import Optional from rss_parser.models import XMLBaseModel +from rss_parser.models.types.only_list import OnlyList from rss_parser.models.types.tag import Tag +from rss_parser.pydantic_proxy import import_v1_pydantic + +pydantic = import_v1_pydantic() class RequiredItemElementsMixin(XMLBaseModel): title: Tag[str] = None # Venice Film Festival Tries to Quit Sinking "The title of the item." - link: Tag[str] = None # http://nytimes.com/2004/12/07FEST.html + links: OnlyList[Tag[str]] = pydantic.Field(alias="link") # http://nytimes.com/2004/12/07FEST.html "The URL of the item." description: Tag[ @@ -28,8 +32,9 @@ class OptionalItemElementsMixin(XMLBaseModel): comments: Optional[Tag[str]] = None "URL of a page for comments relating to the item." - enclosure: Optional[Tag[str]] = None - "Describes a media object that is attached to the item." + enclosures: Optional[OnlyList[Tag[str]]] = pydantic.Field(alias="enclosure", default=[]) + # enclosure: Optional[OnlyList[Tag[str]]] = None + "Describes a media object that is attached to the item.\n" "Can be a list -> https://validator.w3.org/feed/docs/warning/DuplicateEnclosure.html" guid: Optional[Tag[str]] = None "A string that uniquely identifies the item." diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py index fd02e87..0746011 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,4 @@ -from json import loads +import pickle from pathlib import Path import pytest @@ -9,7 +9,12 @@ @pytest.fixture def sample_and_result(request): - with open(sample_dir / f"{request.param[0]}.xml", encoding="utf-8") as sample: - plain = len(request.param) > 1 and request.param[1] - with open(sample_dir / f"{request.param[0]}{'_plain' if plain else ''}.json", encoding="utf-8") as result: - return sample.read(), loads(result.read()) + sample_name = request.param[0] + + with open(sample_dir / sample_name / "data.xml", encoding="utf-8") as sample_file: + sample = sample_file.read() + + with open(sample_dir / sample_name / "result.pkl", "rb") as result_file: + result = pickle.load(result_file) + + return sample, result diff --git a/tests/samples/apology_line.json b/tests/samples/apology_line.json deleted file mode 100644 index c19d110..0000000 --- a/tests/samples/apology_line.json +++ /dev/null @@ -1,144 +0,0 @@ -{ - "channel": { - "attributes": {}, - "content": { - "category": null, - "cloud": null, - "copyright": { - "attributes": {}, - "content": "\u00a9 2021 Wondery, Inc. All rights reserved" - }, - "description": { - "attributes": {}, - "content": "

If you could call a number and say you\u2019re sorry, and no one would know\u2026what would you apologize for? For fifteen years, you could call a number in Manhattan and do just that. This is the story of the line, and the man at the other end who became consumed by his own creation. He was known as \u201cMr. Apology.\u201d As thousands of callers flooded the line, confessing to everything from shoplifting to infidelity, drug dealing to murder, Mr. Apology realized he couldn\u2019t just listen. He had to do something, even if it meant risking everything. From Wondery the makers of Dr. Death and The Shrink Next Door, comes a story about empathy, deception and obsession. Marissa Bridge, who knew Mr. Apology better than anyone, hosts this six episode series.

All episodes are available now. You can binge the series ad-free on Wondery+ or on Amazon Music with a Prime membership or Amazon Music Unlimited subscription.

" - }, - "docs": null, - "generator": { - "attributes": {}, - "content": "ART19" - }, - "image": { - "attributes": {}, - "content": { - "description": null, - "height": null, - "link": { - "attributes": {}, - "content": "https://wondery.com/shows/the-apology-line/?utm_source=rss" - }, - "title": { - "attributes": {}, - "content": "The Apology Line" - }, - "url": { - "attributes": {}, - "content": "https://content.production.cdn.art19.com/images/be/e1/82/c2/bee182c2-14b7-491b-b877-272ab6754025/bd4ab6d08d7b723678a682b6e399d26523245b3ba83f61617b9b28396aba1092b101cd86707576ec021b77e143b447463342b352f8825265b15310c989b6cb93.jpeg" - }, - "width": null - } - }, - "items": [ - { - "attributes": {}, - "content": { - "author": null, - "category": null, - "comments": null, - "description": { - "attributes": {}, - "content": "

When Elon Musk posted a video of himself arriving at Twitter HQ carrying a white sink along with the message \u201clet that sink in!\u201d It marked the end of a dramatic takeover. Musk had gone from Twitter critic to \u201cChief Twit\u201d in the space of just a few months but his arrival didn\u2019t put an end to questions about his motives. Musk had earned a reputation as a business maverick. From PayPal to Tesla to SpaceX, his name was synonymous with big, earth-shattering ideas. So, what did he want with a social media platform? And was this all really in the name of free speech...or was this all in the name of Elon Musk? 


From Wondery, the makers of WeCrashed and In God We Lust, comes the wild story of how the richest man alive took charge of the world\u2019s \u201cdigital public square.\u201d


Listen to Flipping The Bird: Wondery.fm/FTB_TAL

See Privacy Policy at https://art19.com/privacy and California Privacy Notice at https://art19.com/privacy#do-not-sell-my-info.

" - }, - "enclosure": { - "attributes": { - "length": "4824502", - "type": "audio/mpeg", - "url": "https://dts.podtrac.com/redirect.mp3/chrt.fm/track/9EE2G/pdst.fm/e/rss.art19.com/episodes/7bfce2e9-7889-480a-afde-46e810e82b1a.mp3?rss_browser=BAhJIhRweXRob24tcmVxdWVzdHMGOgZFVA%3D%3D--ac965bdf6559f894a935511702ea4ac963845aca" - }, - "content": null - }, - "guid": { - "attributes": { - "is_perma_link": "false" - }, - "content": "gid://art19-episode-locator/V0/tdroPC934g1_yKpnqnfmA67RAho9P0W6PUiIY-tBw3U" - }, - "link": null, - "pub_date": { - "attributes": {}, - "content": "Mon, 01 May 2023 08:00:00 -0000" - }, - "source": null, - "title": { - "attributes": {}, - "content": "Wondery Presents - Flipping The Bird: Elon vs Twitter" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": null, - "comments": null, - "description": { - "attributes": {}, - "content": "

If you could call a number and say you\u2019re sorry, and no one would know\u2026what would you apologize for? For fifteen years, you could call a number in Manhattan and do just that. This is the story of the line, and the man at the other end who became consumed by his own creation. He was known as \u201cMr. Apology.\u201d As thousands of callers flooded the line, confessing to everything from shoplifting to infidelity, drug dealing to murder, Mr. Apology realized he couldn\u2019t just listen. He had to do something, even if it meant risking everything. From Wondery the makers of Dr Death and The Shrink Next Door, comes a story about empathy, deception and obsession. Marissa Bridge, who knew Mr. Apology better than anyone, hosts this six episode series.

All episodes are available now. You can binge the series ad-free on Wondery+ or on Amazon Music with a Prime membership or Amazon Music Unlimited subscription.

See Privacy Policy at https://art19.com/privacy and California Privacy Notice at https://art19.com/privacy#do-not-sell-my-info.

" - }, - "enclosure": { - "attributes": { - "length": "2320091", - "type": "audio/mpeg", - "url": "https://dts.podtrac.com/redirect.mp3/chrt.fm/track/9EE2G/pdst.fm/e/rss.art19.com/episodes/a462e9fa-5e7b-4b0a-b992-d59fa1ca06cd.mp3?rss_browser=BAhJIhRweXRob24tcmVxdWVzdHMGOgZFVA%3D%3D--ac965bdf6559f894a935511702ea4ac963845aca" - }, - "content": null - }, - "guid": { - "attributes": { - "is_perma_link": "false" - }, - "content": "gid://art19-episode-locator/V0/2E7Nce-ZiX0Rmo017w7js5BvvKiOIMjWELujxOvJync" - }, - "link": null, - "pub_date": { - "attributes": {}, - "content": "Tue, 05 Jan 2021 03:26:59 -0000" - }, - "source": null, - "title": { - "attributes": {}, - "content": "Introducing: The Apology Line" - } - } - } - ], - "language": { - "attributes": {}, - "content": "en" - }, - "last_build_date": null, - "link": { - "attributes": {}, - "content": "https://wondery.com/shows/the-apology-line/?utm_source=rss" - }, - "managing_editor": { - "attributes": {}, - "content": "iwonder@wondery.com (Wondery)" - }, - "pub_date": null, - "rating": null, - "skip_days": null, - "skip_hours": null, - "text_input": null, - "title": { - "attributes": {}, - "content": "The Apology Line" - }, - "ttl": null, - "web_master": null - } - }, - "version": { - "attributes": {}, - "content": "2.0" - } -} \ No newline at end of file diff --git a/tests/samples/apology_line.xml b/tests/samples/apology_line/data.xml similarity index 98% rename from tests/samples/apology_line.xml rename to tests/samples/apology_line/data.xml index 84869ec..6f0b4d9 100644 --- a/tests/samples/apology_line.xml +++ b/tests/samples/apology_line/data.xml @@ -59,6 +59,7 @@ See Privacy Policy at https://art19.com/privacy and California Privacy Notice at Serial killer,TRUE CRIME,Society,This American Life,MURDER,Apology,Apology Line,Binge Worthy Documentary,New York City,Binge-worthy true crime,exhibit c 00:05:01 + https://wondery.com/shows/the-apology-line/?utm_source=rss Introducing: The Apology Line @@ -82,6 +83,7 @@ See Privacy Policy at https://art19.com/privacy and California Privacy Notice at Exhibit C,New York City,Murder,This American Life,society,serial killer,true crime,Apology Line,Binge Worthy Documentary ,Binge-worthy true crime,Apology 00:02:24 + https://wondery.com/shows/the-apology-line/?utm_source=rss diff --git a/tests/samples/apology_line/result.pkl b/tests/samples/apology_line/result.pkl new file mode 100644 index 0000000..f78ee40 Binary files /dev/null and b/tests/samples/apology_line/result.pkl differ diff --git a/tests/samples/apology_line_plain.json b/tests/samples/apology_line_plain.json deleted file mode 100644 index f55e13c..0000000 --- a/tests/samples/apology_line_plain.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "channel": { - "category": null, - "cloud": null, - "copyright": "\u00a9 2021 Wondery, Inc. All rights reserved", - "description": "

If you could call a number and say you\u2019re sorry, and no one would know\u2026what would you apologize for? For fifteen years, you could call a number in Manhattan and do just that. This is the story of the line, and the man at the other end who became consumed by his own creation. He was known as \u201cMr. Apology.\u201d As thousands of callers flooded the line, confessing to everything from shoplifting to infidelity, drug dealing to murder, Mr. Apology realized he couldn\u2019t just listen. He had to do something, even if it meant risking everything. From Wondery the makers of Dr. Death and The Shrink Next Door, comes a story about empathy, deception and obsession. Marissa Bridge, who knew Mr. Apology better than anyone, hosts this six episode series.

All episodes are available now. You can binge the series ad-free on Wondery+ or on Amazon Music with a Prime membership or Amazon Music Unlimited subscription.

", - "docs": null, - "generator": "ART19", - "image": { - "description": null, - "height": null, - "link": "https://wondery.com/shows/the-apology-line/?utm_source=rss", - "title": "The Apology Line", - "url": "https://content.production.cdn.art19.com/images/be/e1/82/c2/bee182c2-14b7-491b-b877-272ab6754025/bd4ab6d08d7b723678a682b6e399d26523245b3ba83f61617b9b28396aba1092b101cd86707576ec021b77e143b447463342b352f8825265b15310c989b6cb93.jpeg", - "width": null - }, - "items": [ - { - "author": null, - "category": null, - "comments": null, - "description": "

When Elon Musk posted a video of himself arriving at Twitter HQ carrying a white sink along with the message \u201clet that sink in!\u201d It marked the end of a dramatic takeover. Musk had gone from Twitter critic to \u201cChief Twit\u201d in the space of just a few months but his arrival didn\u2019t put an end to questions about his motives. Musk had earned a reputation as a business maverick. From PayPal to Tesla to SpaceX, his name was synonymous with big, earth-shattering ideas. So, what did he want with a social media platform? And was this all really in the name of free speech...or was this all in the name of Elon Musk? 


From Wondery, the makers of WeCrashed and In God We Lust, comes the wild story of how the richest man alive took charge of the world\u2019s \u201cdigital public square.\u201d


Listen to Flipping The Bird: Wondery.fm/FTB_TAL

See Privacy Policy at https://art19.com/privacy and California Privacy Notice at https://art19.com/privacy#do-not-sell-my-info.

", - "enclosure": null, - "guid": "gid://art19-episode-locator/V0/tdroPC934g1_yKpnqnfmA67RAho9P0W6PUiIY-tBw3U", - "link": null, - "pub_date": "Mon, 01 May 2023 08:00:00 -0000", - "source": null, - "title": "Wondery Presents - Flipping The Bird: Elon vs Twitter" - }, - { - "author": null, - "category": null, - "comments": null, - "description": "

If you could call a number and say you\u2019re sorry, and no one would know\u2026what would you apologize for? For fifteen years, you could call a number in Manhattan and do just that. This is the story of the line, and the man at the other end who became consumed by his own creation. He was known as \u201cMr. Apology.\u201d As thousands of callers flooded the line, confessing to everything from shoplifting to infidelity, drug dealing to murder, Mr. Apology realized he couldn\u2019t just listen. He had to do something, even if it meant risking everything. From Wondery the makers of Dr Death and The Shrink Next Door, comes a story about empathy, deception and obsession. Marissa Bridge, who knew Mr. Apology better than anyone, hosts this six episode series.

All episodes are available now. You can binge the series ad-free on Wondery+ or on Amazon Music with a Prime membership or Amazon Music Unlimited subscription.

See Privacy Policy at https://art19.com/privacy and California Privacy Notice at https://art19.com/privacy#do-not-sell-my-info.

", - "enclosure": null, - "guid": "gid://art19-episode-locator/V0/2E7Nce-ZiX0Rmo017w7js5BvvKiOIMjWELujxOvJync", - "link": null, - "pub_date": "Tue, 05 Jan 2021 03:26:59 -0000", - "source": null, - "title": "Introducing: The Apology Line" - } - ], - "language": "en", - "last_build_date": null, - "link": "https://wondery.com/shows/the-apology-line/?utm_source=rss", - "managing_editor": "iwonder@wondery.com (Wondery)", - "pub_date": null, - "rating": null, - "skip_days": null, - "skip_hours": null, - "text_input": null, - "title": "The Apology Line", - "ttl": null, - "web_master": null - }, - "version": "2.0" -} \ No newline at end of file diff --git a/tests/samples/atom.json b/tests/samples/atom.json deleted file mode 100644 index e67cb22..0000000 --- a/tests/samples/atom.json +++ /dev/null @@ -1,149 +0,0 @@ -{ - "feed": { - "attributes": {}, - "content": { - "authors": [], - "categories": [], - "contributors": [], - "entries": [ - { - "attributes": {}, - "content": { - "authors": [ - { - "attributes": {}, - "content": { - "email": { - "attributes": {}, - "content": "mail@example.com" - }, - "name": { - "attributes": {}, - "content": "John Doe" - }, - "uri": { - "attributes": {}, - "content": "http://example.org/" - } - } - } - ], - "categories": [], - "content": { - "attributes": { - "type": "xhtml", - "xml:base": "http://diveintomark.org/", - "xml:lang": "en" - }, - "content": "[Update: The Atom draft is finished.]" - }, - "contributors": [ - { - "attributes": {}, - "content": { - "email": null, - "name": { - "attributes": {}, - "content": "John Doe" - }, - "uri": null - } - } - ], - "id": { - "attributes": {}, - "content": "tag:example.org,2003:3.2397" - }, - "links": [ - { - "attributes": { - "href": "http://example.org/2005/04/02/atom", - "rel": "alternate", - "type": "text/html" - }, - "content": null - }, - { - "attributes": { - "href": "http://example.org/audio/ph34r_my_podcast.mp3", - "length": "1337", - "rel": "enclosure", - "type": "audio/mpeg" - }, - "content": null - } - ], - "published": { - "attributes": {}, - "content": "2003-12-13T08:29:29-04:00" - }, - "rights": null, - "source": null, - "summary": null, - "title": { - "attributes": {}, - "content": "Atom draft-07 snapshot" - }, - "updated": { - "attributes": {}, - "content": "2005-07-31T12:29:29+00:00" - } - } - } - ], - "generator": { - "attributes": { - "uri": "http://www.example.com/", - "version": "1.0" - }, - "content": "Example Toolkit" - }, - "icon": null, - "id": { - "attributes": {}, - "content": "tag:example.org,2003:3" - }, - "links": [ - { - "attributes": { - "href": "http://example.org/", - "hreflang": "en", - "rel": "alternate", - "type": "text/html" - }, - "content": null - }, - { - "attributes": { - "href": "http://example.org/feed.atom", - "rel": "self", - "type": "application/atom+xml" - }, - "content": null - } - ], - "logo": null, - "rights": { - "attributes": {}, - "content": "Copyright (c) 2003, John Doe" - }, - "subtitle": { - "attributes": { - "type": "html" - }, - "content": "A lot of effort\n went into making this effortless" - }, - "title": { - "attributes": { - "type": "text" - }, - "content": "Title" - }, - "updated": { - "attributes": {}, - "content": "2005-07-31T12:29:29+00:00" - } - } - }, - "version": null -} \ No newline at end of file diff --git a/tests/samples/atom.xml b/tests/samples/atom/data.xml similarity index 100% rename from tests/samples/atom.xml rename to tests/samples/atom/data.xml diff --git a/tests/samples/atom/result.pkl b/tests/samples/atom/result.pkl new file mode 100644 index 0000000..11a703b Binary files /dev/null and b/tests/samples/atom/result.pkl differ diff --git a/tests/samples/atom_plain.json b/tests/samples/atom_plain.json deleted file mode 100644 index 67ab469..0000000 --- a/tests/samples/atom_plain.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "feed": { - "authors": [], - "categories": [], - "contributors": [], - "entries": [ - { - "authors": [ - { - "email": "mail@example.com", - "name": "John Doe", - "uri": "http://example.org/" - } - ], - "categories": [], - "content": "[Update: The Atom draft is finished.]", - "contributors": [ - { - "email": null, - "name": "John Doe", - "uri": null - } - ], - "id": "tag:example.org,2003:3.2397", - "links": [ - null, - null - ], - "published": "2003-12-13T08:29:29-04:00", - "rights": null, - "source": null, - "summary": null, - "title": "Atom draft-07 snapshot", - "updated": "2005-07-31T12:29:29+00:00" - } - ], - "generator": "Example Toolkit", - "icon": null, - "id": "tag:example.org,2003:3", - "links": [ - null, - null - ], - "logo": null, - "rights": "Copyright (c) 2003, John Doe", - "subtitle": "A lot of effort\n went into making this effortless", - "title": "Title", - "updated": "2005-07-31T12:29:29+00:00" - }, - "version": null -} \ No newline at end of file diff --git a/tests/samples/custom.xml b/tests/samples/custom.xml deleted file mode 100644 index d050924..0000000 --- a/tests/samples/custom.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - Custom tag data - \ No newline at end of file diff --git a/tests/samples/generic_atom_feed.json b/tests/samples/generic_atom_feed.json deleted file mode 100644 index 9ff4c81..0000000 --- a/tests/samples/generic_atom_feed.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "feed": { - "attributes": { - "xmlns": "http://www.w3.org/2005/Atom" - }, - "content": { - "authors": [ - { - "attributes": {}, - "content": { - "email": null, - "name": { - "attributes": {}, - "content": "FYIcenter.com" - }, - "uri": null - } - } - ], - "categories": [ - { - "attributes": { - "term": "Programming" - }, - "content": null - }, - { - "attributes": { - "term": "Computer" - }, - "content": null - }, - { - "attributes": { - "term": "Developer" - }, - "content": null - } - ], - "contributors": [], - "entries": [ - { - "attributes": {}, - "content": { - "authors": [ - { - "attributes": {}, - "content": { - "email": null, - "name": { - "attributes": {}, - "content": "FYIcenter.com" - }, - "uri": null - } - } - ], - "categories": [ - { - "attributes": { - "term": "Microsoft" - }, - "content": null - } - ], - "content": null, - "contributors": [], - "id": { - "attributes": {}, - "content": "http://dev.fyicenter.com/1000702_Use_Developer_Portal_Internally.html" - }, - "links": [ - { - "attributes": { - "href": "http://dev.fyicenter.com/1000702_Use_Developer_Portal_Internally.ht ml", - "rel": "alternate" - }, - "content": null - } - ], - "published": null, - "rights": null, - "source": null, - "summary": { - "attributes": { - "type": "html" - }, - "content": "How to \nuse the Developer Portal internally by you as the publisher? Normally, \nthe Developer Portal of an Azure API Management Service is used by \nclient developers. But as a publisher, you can also use the Developer \nPortal to test API operations internally. You can follow this tutorial \nto access the ... - Rank: 120; Updated: 2017-09-20 13:29:06 -> Source" - }, - "title": { - "attributes": {}, - "content": "Use Developer Portal Internally" - }, - "updated": { - "attributes": {}, - "content": "2017-09-20T13:29:08+02:00" - } - } - }, - { - "attributes": {}, - "content": { - "authors": [ - { - "attributes": {}, - "content": { - "email": null, - "name": { - "attributes": {}, - "content": "FYIcenter.com" - }, - "uri": null - } - } - ], - "categories": [ - { - "attributes": { - "term": "Microsoft" - }, - "content": null - } - ], - "content": null, - "contributors": [], - "id": { - "attributes": {}, - "content": "http://dev.fyicenter.com/1000701_Using_Azure_API_Management_Developer\n_Portal.html" - }, - "links": [ - { - "attributes": { - "href": "http://dev.fyicenter.com/1000701_Using_Azure_API_Management_Develop er_Portal.html", - "rel": "alternate" - }, - "content": null - } - ], - "published": null, - "rights": null, - "source": null, - "summary": { - "attributes": { - "type": "html" - }, - "content": "Where to \nfind tutorials on Using Azure API Management Developer Portal? Here is \na list of tutorials to answer many frequently asked questions compiled \nby FYIcenter.com team on Using Azure API Management Developer Portal: \nUse Developer Portal Internally What Can I See on Developer Portal What \nI You T... - Rank: 120; Updated: 2017-09-20 13:29:06 -> Source" - }, - "title": { - "attributes": {}, - "content": "Using Azure API Management Developer Portal" - }, - "updated": { - "attributes": {}, - "content": "2017-09-20T13:29:07+02:00" - } - } - }, - { - "attributes": {}, - "content": { - "authors": [ - { - "attributes": {}, - "content": { - "email": null, - "name": { - "attributes": {}, - "content": "FYIcenter.com" - }, - "uri": null - } - } - ], - "categories": [ - { - "attributes": { - "term": "Microsoft" - }, - "content": null - } - ], - "content": null, - "contributors": [], - "id": { - "attributes": {}, - "content": "http://dev.fyicenter.com/1000700_Add_API_to_API_Products.html" - }, - "links": [ - { - "attributes": { - "href": "http://dev.fyicenter.com/1000700_Add_API_to_API_Products.html", - "rel": "alternate" - }, - "content": null - } - ], - "published": null, - "rights": null, - "source": null, - "summary": { - "attributes": { - "type": "html" - }, - "content": "How to \nadd an API to an API product for internal testing on the Publisher \nPortal of an Azure API Management Service? You can follow this tutorial \nto add an API to an API product on the Publisher Portal of an Azure API \nManagement Service. 1. Click API from the left menu on the Publisher \nPortal. You s... - Rank: 119; Updated: 2017-09-20 13:29:06 -> Sour\nce" - }, - "title": { - "attributes": {}, - "content": "Add API to API Products" - }, - "updated": { - "attributes": {}, - "content": "2017-09-20T13:29:06+02:00" - } - } - } - ], - "generator": null, - "icon": null, - "id": { - "attributes": {}, - "content": "http://dev.fyicenter.com/atom_xml.php" - }, - "links": [ - { - "attributes": { - "href": "http://dev.fyicenter.com/atom_xml.php", - "rel": "self" - }, - "content": null - } - ], - "logo": null, - "rights": { - "attributes": {}, - "content": "Copyright (c) 2017 FYIcenter.com" - }, - "subtitle": { - "attributes": {}, - "content": "FYI (For Your Information) Center for Software Developers with \nlarge collection of FAQs, tutorials and tips codes for application and \nwWeb developers on Java, .NET, C, PHP, JavaScript, XML, HTML, CSS, RSS, \nMySQL and Oracle - dev.fyicenter.com." - }, - "title": { - "attributes": {}, - "content": "FYI Center for Software Developers" - }, - "updated": { - "attributes": {}, - "content": "2017-09-22T03:58:52+02:00" - } - } - }, - "version": null -} \ No newline at end of file diff --git a/tests/samples/generic_atom_feed.xml b/tests/samples/generic_atom_feed/data.xml similarity index 100% rename from tests/samples/generic_atom_feed.xml rename to tests/samples/generic_atom_feed/data.xml diff --git a/tests/samples/generic_atom_feed/result.pkl b/tests/samples/generic_atom_feed/result.pkl new file mode 100644 index 0000000..e1cd354 Binary files /dev/null and b/tests/samples/generic_atom_feed/result.pkl differ diff --git a/tests/samples/generic_atom_feed_plain.json b/tests/samples/generic_atom_feed_plain.json deleted file mode 100644 index 097f9cf..0000000 --- a/tests/samples/generic_atom_feed_plain.json +++ /dev/null @@ -1,103 +0,0 @@ -{ - "feed": { - "authors": [ - { - "email": null, - "name": "FYIcenter.com", - "uri": null - } - ], - "categories": [ - null, - null, - null - ], - "contributors": [], - "entries": [ - { - "authors": [ - { - "email": null, - "name": "FYIcenter.com", - "uri": null - } - ], - "categories": [ - null - ], - "content": null, - "contributors": [], - "id": "http://dev.fyicenter.com/1000702_Use_Developer_Portal_Internally.html", - "links": [ - null - ], - "published": null, - "rights": null, - "source": null, - "summary": "How to \nuse the Developer Portal internally by you as the publisher? Normally, \nthe Developer Portal of an Azure API Management Service is used by \nclient developers. But as a publisher, you can also use the Developer \nPortal to test API operations internally. You can follow this tutorial \nto access the ... - Rank: 120; Updated: 2017-09-20 13:29:06 -> Source", - "title": "Use Developer Portal Internally", - "updated": "2017-09-20T13:29:08+02:00" - }, - { - "authors": [ - { - "email": null, - "name": "FYIcenter.com", - "uri": null - } - ], - "categories": [ - null - ], - "content": null, - "contributors": [], - "id": "http://dev.fyicenter.com/1000701_Using_Azure_API_Management_Developer\n_Portal.html", - "links": [ - null - ], - "published": null, - "rights": null, - "source": null, - "summary": "Where to \nfind tutorials on Using Azure API Management Developer Portal? Here is \na list of tutorials to answer many frequently asked questions compiled \nby FYIcenter.com team on Using Azure API Management Developer Portal: \nUse Developer Portal Internally What Can I See on Developer Portal What \nI You T... - Rank: 120; Updated: 2017-09-20 13:29:06 -> Source", - "title": "Using Azure API Management Developer Portal", - "updated": "2017-09-20T13:29:07+02:00" - }, - { - "authors": [ - { - "email": null, - "name": "FYIcenter.com", - "uri": null - } - ], - "categories": [ - null - ], - "content": null, - "contributors": [], - "id": "http://dev.fyicenter.com/1000700_Add_API_to_API_Products.html", - "links": [ - null - ], - "published": null, - "rights": null, - "source": null, - "summary": "How to \nadd an API to an API product for internal testing on the Publisher \nPortal of an Azure API Management Service? You can follow this tutorial \nto add an API to an API product on the Publisher Portal of an Azure API \nManagement Service. 1. Click API from the left menu on the Publisher \nPortal. You s... - Rank: 119; Updated: 2017-09-20 13:29:06 -> Sour\nce", - "title": "Add API to API Products", - "updated": "2017-09-20T13:29:06+02:00" - } - ], - "generator": null, - "icon": null, - "id": "http://dev.fyicenter.com/atom_xml.php", - "links": [ - null - ], - "logo": null, - "rights": "Copyright (c) 2017 FYIcenter.com", - "subtitle": "FYI (For Your Information) Center for Software Developers with \nlarge collection of FAQs, tutorials and tips codes for application and \nwWeb developers on Java, .NET, C, PHP, JavaScript, XML, HTML, CSS, RSS, \nMySQL and Oracle - dev.fyicenter.com.", - "title": "FYI Center for Software Developers", - "updated": "2017-09-22T03:58:52+02:00" - }, - "version": null -} \ No newline at end of file diff --git a/tests/samples/github-49/data.xml b/tests/samples/github-49/data.xml new file mode 100644 index 0000000..1a06060 --- /dev/null +++ b/tests/samples/github-49/data.xml @@ -0,0 +1,2109 @@ + + + + www.rbc.ru + https://www.rbc.ru + + 30 + ru + + http://pics.rbc.ru/img/fp_v4/skin/img/v6-logo.png + https://www.rbc.ru + Главные новости :: www.rbc.ru + + + <![CDATA[Чешский футболист пропустит Евро из-за падения с велосипеда]]> + https://www.rbc.ru/sport/09/06/2024/666592289a79475b6a241776 + Sun, 09 Jun 2024 14:42:31 +0300 + + Спорт + Анна Сатдинова + rssexport.rbc.ru:sport:666592289a79475b6a241776 + + 14:42:31 + 09.06.2024 + https://www.rbc.ru/sport/09/06/2024/666592289a79475b6a241776 + + 666592289a79475b6a241776 + article + 1717933351 + Sun, 09 Jun 2024 14:42:33 +0300 + sport + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/15/347179326576153.png + image/png + original + copyright + + Михал Садилек

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/0/82/347178424573820.jpeg + image/jpeg + original + limited + + Килиан Мбаппе

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/4/03/347178342040034.jpeg + image/jpeg + original + limited + + Аркадиуш Милик

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/25/347178314474255.jpeg + image/jpeg + original + limited + + Михаил Мудрик

]]>
+
+ +
+
+ + <![CDATA[В хоккейном СКА отреагировали на пожар на домашней арене]]> + https://www.rbc.ru/sport/09/06/2024/66658b889a794752eee761e9 + Sun, 09 Jun 2024 14:31:45 +0300 + + Спорт + Анна Сатдинова + rssexport.rbc.ru:sport:66658b889a794752eee761e9 + + 14:31:45 + 09.06.2024 + https://www.rbc.ru/sport/09/06/2024/66658b889a794752eee761e9 + + 66658b889a794752eee761e9 + article + 1717932705 + Sun, 09 Jun 2024 14:31:47 +0300 + sport + + + + + https://s0.rbk.ru/v6_top_pics/media/img/1/67/347179310741671.png + image/png + original + copyright + + «СКА Арена» в Санкт-Петербурге

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/1/86/347177547708861.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/74/347079323125745.jpeg + image/jpeg + original + limited + + «СКА Арена»

]]>
+
+ + + + +
+
+ + <![CDATA[Парковки по всей Москве сделают бесплатными в День России]]> + https://www.rbc.ru/rbcfreenews/66658e8b9a7947bf5129beda + Sun, 09 Jun 2024 14:27:10 +0300 + + Общество + rssexport.rbc.ru:society:66658e8b9a7947bf5129beda + + 14:27:10 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/66658e8b9a7947bf5129beda + + 66658e8b9a7947bf5129beda + short_news + 1717932430 + Sun, 09 Jun 2024 14:56:05 +0300 + society + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/92/347179332501926.jpeg + image/jpeg + original + copyright + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/26/347158669327266.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/00/347142231644002.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/65/347140246903656.jpeg + image/jpeg + original + limited + + + + + + + <![CDATA[В Белгороде и Курской области объявили ракетную опасность]]> + https://www.rbc.ru/rbcfreenews/66658f8f9a79473cc364f058 + Sun, 09 Jun 2024 14:24:59 +0300 + + Политика + rssexport.rbc.ru:politics:66658f8f9a79473cc364f058 + + 14:24:59 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/66658f8f9a79473cc364f058 + + 66658f8f9a79473cc364f058 + short_news + 1717932299 + Sun, 09 Jun 2024 14:52:11 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/4/76/347179329546764.jpeg + image/jpeg + original + copyright + + Белгород

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/40/347179255944402.jpeg + image/jpeg + original + limited + + Белгород

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/74/347179234584748.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/90/347179180733905.jpeg + image/jpeg + original + limited + + Воронеж

]]>
+
+ +
+
+ + <![CDATA[Гладков сообщил об атаке двух дронов на село Безымено]]> + https://www.rbc.ru/rbcfreenews/66658c0a9a794744d3f3efde + Sun, 09 Jun 2024 14:20:11 +0300 + + Политика + rssexport.rbc.ru:politics:66658c0a9a794744d3f3efde + + 14:20:11 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/66658c0a9a794744d3f3efde + + 66658c0a9a794744d3f3efde + short_news + 1717932011 + Sun, 09 Jun 2024 14:50:28 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/86/347179315001862.jpeg + image/jpeg + original + copyright + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/40/347179255944402.jpeg + image/jpeg + original + limited + + Белгород

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/74/347179234584748.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/90/347179180733905.jpeg + image/jpeg + original + limited + + Воронеж

]]>
+
+ +
+
+ + <![CDATA[Минздрав Газы сообщил о 274 погибших в ходе операции ЦАХАЛ в Нусейрате]]> + https://www.rbc.ru/politics/09/06/2024/6665872f9a794708d5a2ffe4 + Sun, 09 Jun 2024 14:12:05 +0300 + + Политика + Илья Пламенев + rssexport.rbc.ru:politics:6665872f9a794708d5a2ffe4 + + 14:12:05 + 09.06.2024 + https://www.rbc.ru/politics/09/06/2024/6665872f9a794708d5a2ffe4 + + 6665872f9a794708d5a2ffe4 + article + 1717931525 + Sun, 09 Jun 2024 14:18:40 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/62/347179305621628.jpeg + image/jpeg + original + copyright + + Нейсурат, сектор Газа

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/1/14/347179010398141.jpeg + image/jpeg + original + limited + + + + + + + + + + +
+ + <![CDATA[На греческом острове нашли тело британского телеведущего Мосли]]> + https://www.rbc.ru/rbcfreenews/666581469a79474b50b4c902 + Sun, 09 Jun 2024 14:03:23 +0300 + + Политика + rssexport.rbc.ru:politics:666581469a79474b50b4c902 + + 14:03:23 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/666581469a79474b50b4c902 + + 666581469a79474b50b4c902 + short_news + 1717931003 + Sun, 09 Jun 2024 14:07:45 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/85/347179291561857.jpeg + image/jpeg + original + copyright + + Обстановка на месте происшествия на острове Сими, Греция

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/9/47/347177862568479.jpeg + image/jpeg + original + limited + + Майкл Мосли

]]>
+
+ +
+
+ + <![CDATA[Marca узнала о планах «Реала» купить самого дорогого немецкого футболиста]]> + https://www.rbc.ru/sport/09/06/2024/6665858a9a7947709634d767 + Sun, 09 Jun 2024 13:59:14 +0300 + + Спорт + Анна Сатдинова + rssexport.rbc.ru:sport:6665858a9a7947709634d767 + + 13:59:14 + 09.06.2024 + https://www.rbc.ru/sport/09/06/2024/6665858a9a7947709634d767 + + 6665858a9a7947709634d767 + article + 1717930754 + Sun, 09 Jun 2024 14:16:13 +0300 + sport + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/94/347179307185947.png + image/png + original + copyright + + Флориан Вирц

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/79/347175838185797.jpeg + image/jpeg + original + limited + + Футболисты ФК «Жирона»

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/9/75/347160438326759.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/26/347163878988265.jpeg + image/jpeg + original + limited + + Трофей Лиги Европы

]]>
+
+ +
+
+ + <![CDATA[Большинство немцев выступили за возвращение обязательного призыва в армию]]> + https://www.rbc.ru/politics/09/06/2024/66657daf9a7947174df08354 + Sun, 09 Jun 2024 13:53:29 +0300 + + Политика + Илья Пламенев + rssexport.rbc.ru:politics:66657daf9a7947174df08354 + + 13:53:29 + 09.06.2024 + https://www.rbc.ru/politics/09/06/2024/66657daf9a7947174df08354 + + 66657daf9a7947174df08354 + article + 1717930409 + Sun, 09 Jun 2024 14:00:14 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/58/347179281459583.jpeg + image/jpeg + original + copyright + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/0/59/347179263271590.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/90/347176011297902.jpeg + image/jpeg + original + limited + + Борис Писториус

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/87/347081866152873.jpeg + image/jpeg + original + limited + + Борис Писториус

]]>
+
+ +
+
+ + <![CDATA[В Херсонской области объявили авиационную опасность]]> + https://www.rbc.ru/rbcfreenews/666586ee9a7947ce6ee276fb + Sun, 09 Jun 2024 13:45:37 +0300 + + Политика + rssexport.rbc.ru:politics:666586ee9a7947ce6ee276fb + + 13:45:37 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/666586ee9a7947ce6ee276fb + + 666586ee9a7947ce6ee276fb + short_news + 1717929937 + Sun, 09 Jun 2024 14:03:15 +0300 + politics + + + + + https://s0.rbk.ru/v6_top_pics/media/img/9/51/347179303402519.jpeg + image/jpeg + original + copyright + + Херсон

]]>
+
+ + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/87/347178532990876.jpeg + image/jpeg + original + limited + + Обстановка в селе Садовое, 7 июля 2024 г.

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/4/46/347178355570464.jpeg + image/jpeg + original + limited + + + +
+
+ + <![CDATA[Минобороны сообщило о сбитом украинском вертолете Ми-8]]> + https://www.rbc.ru/rbcfreenews/666584719a79473e002ab642 + Sun, 09 Jun 2024 13:36:12 +0300 + + Политика + rssexport.rbc.ru:politics:666584719a79473e002ab642 + + 13:36:12 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/666584719a79473e002ab642 + + 666584719a79473e002ab642 + short_news + 1717929372 + Sun, 09 Jun 2024 13:55:01 +0300 + politics + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/1/98/347179300588981.jpeg + image/jpeg + original + copyright + + Вертолёт Ми-8

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/78/347179254085788.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/4/36/347178457849364.jpeg + image/jpeg + original + limited + + Вертолёт Ми-8

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/29/347178376874296.jpeg + image/jpeg + original + limited + + + +
+
+ + <![CDATA[Будущий игрок «Реала» Эндрик повторил достижение Пеле в сборной Бразилии]]> + https://www.rbc.ru/sport/09/06/2024/66657dcc9a794797be6910e8 + Sun, 09 Jun 2024 13:29:42 +0300 + + Спорт + Анна Сатдинова + rssexport.rbc.ru:sport:66657dcc9a794797be6910e8 + + + 13:29:42 + 09.06.2024 + https://www.rbc.ru/sport/09/06/2024/66657dcc9a794797be6910e8 + + 66657dcc9a794797be6910e8 + article + 1717928982 + Sun, 09 Jun 2024 13:36:59 +0300 + sport + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/69/347179274026698.png + image/png + original + copyright + + Эндрик (в центре)

]]>
+
+ + https://img.youtube.com/vi/d89n5rTI-q4/0.jpg + image/jpeg + original + copyright + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/9/81/347115319717819.jpeg + image/jpeg + original + limited + + Эндрик

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/38/347019316692383.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/17/347157660389177.png + image/png + original + limited + + Джон Кеннеди

]]>
+
+ +
+
+ + <![CDATA[Орбан рассказал, от чего зависит мир на Украине]]> + https://www.rbc.ru/rbcfreenews/66657b489a794731ffe7280a + Sun, 09 Jun 2024 13:19:30 +0300 + + Политика + rssexport.rbc.ru:politics:66657b489a794731ffe7280a + + 13:19:30 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/66657b489a794731ffe7280a + + 66657b489a794731ffe7280a + short_news + 1717928370 + Sun, 09 Jun 2024 13:27:05 +0300 + politics + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/36/347179274146362.jpeg + image/jpeg + original + copyright + + Виктор Орбан

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/15/347178955131155.jpeg + image/jpeg + original + limited + + Виктор Орбан

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/27/346841457775272.jpg + image/jpeg + original + limited + + Марчин Пшидач

]]>
+
+ + + + +
+
+ + <![CDATA[В Минобороны Украины назвали число женщин в рядах ВСУ]]> + https://www.rbc.ru/politics/09/06/2024/6665736d9a7947fffe234918 + Sun, 09 Jun 2024 13:09:35 +0300 + + Политика + Юлия Овчинникова + rssexport.rbc.ru:politics:6665736d9a7947fffe234918 + + 13:09:35 + 09.06.2024 + https://www.rbc.ru/politics/09/06/2024/6665736d9a7947fffe234918 + + 6665736d9a7947fffe234918 + article + 1717927775 + Sun, 09 Jun 2024 13:21:15 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/78/347179254085788.jpeg + image/jpeg + original + copyright + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/45/347045423790453.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/01/347034285362016.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/97/347172567331977.jpeg + image/jpeg + original + limited + + + + + + + <![CDATA[В Миргороде прогремели взрывы]]> + https://www.rbc.ru/politics/09/06/2024/666579f39a79471a2aa3bb18 + Sun, 09 Jun 2024 13:04:35 +0300 + + Политика + Илья Пламенев + rssexport.rbc.ru:politics:666579f39a79471a2aa3bb18 + + 13:04:35 + 09.06.2024 + https://www.rbc.ru/politics/09/06/2024/666579f39a79471a2aa3bb18 + + 666579f39a79471a2aa3bb18 + article + 1717927475 + Sun, 09 Jun 2024 13:54:10 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/0/23/347179272525230.jpeg + image/jpeg + original + copyright + + + + + + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/48/347179117697483.jpeg + image/jpeg + original + limited + + + + + + + <![CDATA[Уроженец Дагестана Имавов победил американца в главном бою турнира UFC]]> + https://www.rbc.ru/sport/09/06/2024/666576bf9a794710b7db95ad + Sun, 09 Jun 2024 12:58:17 +0300 + + Спорт + Анна Сатдинова + rssexport.rbc.ru:sport:666576bf9a794710b7db95ad + + 12:58:17 + 09.06.2024 + https://www.rbc.ru/sport/09/06/2024/666576bf9a794710b7db95ad + + 666576bf9a794710b7db95ad + article + 1717927097 + Sun, 09 Jun 2024 13:07:13 +0300 + sport + + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/78/347179267448786.png + image/png + original + copyright + + Нассурдин Имавов

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/0/60/347175745717600.jpeg + image/jpeg + original + limited + + Чарльз Оливейра

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/27/347176585073277.jpeg + image/jpeg + original + limited + + Дастин Порье и Ислам Махачев

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/88/347173202481885.png + image/png + original + limited + + Дональд Трамп

]]>
+
+ +
+
+ + <![CDATA[Захарова заявила о «кознях Запада» перед ПМЭФ]]> + https://www.rbc.ru/rbcfreenews/6665767e9a794710b7db95a8 + Sun, 09 Jun 2024 12:55:00 +0300 + + Политика + rssexport.rbc.ru:politics:6665767e9a794710b7db95a8 + + 12:55:00 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/6665767e9a794710b7db95a8 + + 6665767e9a794710b7db95a8 + short_news + 1717926900 + Sun, 09 Jun 2024 13:03:51 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/68/347179269365682.jpeg + image/jpeg + original + copyright + + Мария Захарова

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/13/347178455295132.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/68/347177650436685.jpeg + image/jpeg + original + limited + + Владимир Путин и Александр Беглов

]]>
+
+ + + + +
+
+ + <![CDATA[В Германии решили увеличить число резервистов]]> + https://www.rbc.ru/politics/09/06/2024/666570559a794747ceae4f9c + Sun, 09 Jun 2024 12:52:02 +0300 + + Политика + Илья Пламенев + rssexport.rbc.ru:politics:666570559a794747ceae4f9c + + 12:52:02 + 09.06.2024 + https://www.rbc.ru/politics/09/06/2024/666570559a794747ceae4f9c + + 666570559a794747ceae4f9c + article + 1717926722 + Sun, 09 Jun 2024 13:03:56 +0300 + politics + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/0/59/347179263271590.jpeg + image/jpeg + original + copyright + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/4/47/347177893251474.jpeg + image/jpeg + original + limited + + Пусковая установка зенитно-ракетного комплекса Patriot

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/0/17/347176779986170.jpeg + image/jpeg + original + limited + + Олаф Шольц

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/90/347176011297902.jpeg + image/jpeg + original + limited + + Борис Писториус

]]>
+
+ +
+
+ + <![CDATA[Гладков рассказал о последствиях атаки на Шебекино]]> + https://www.rbc.ru/rbcfreenews/666574689a7947fc21cd7a6a + Sun, 09 Jun 2024 12:30:57 +0300 + + Политика + rssexport.rbc.ru:politics:666574689a7947fc21cd7a6a + + 12:30:57 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/666574689a7947fc21cd7a6a + + 666574689a7947fc21cd7a6a + short_news + 1717925457 + Sun, 09 Jun 2024 12:37:07 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/26/347179258202263.jpeg + image/jpeg + original + copyright + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/74/347179234584748.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/90/347179180733905.jpeg + image/jpeg + original + limited + + Воронеж

]]>
+
+ + + + +
+
+ + <![CDATA[Жителей Белгорода во второй раз за день предупредили о ракетной опасности]]> + https://www.rbc.ru/rbcfreenews/6665750c9a794710b7db959e + Sun, 09 Jun 2024 12:29:05 +0300 + + Политика + rssexport.rbc.ru:politics:6665750c9a794710b7db959e + + 12:29:05 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/6665750c9a794710b7db959e + + 6665750c9a794710b7db959e + short_news + 1717925345 + Sun, 09 Jun 2024 12:33:36 +0300 + politics + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/40/347179255944402.jpeg + image/jpeg + original + copyright + + Белгород

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/74/347179234584748.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/90/347179180733905.jpeg + image/jpeg + original + limited + + Воронеж

]]>
+
+ + + + +
+
+ + <![CDATA[На петербургской «СКА Арене» за ₽60 млрд произошел пожар]]> + https://www.rbc.ru/sport/09/06/2024/66656e3c9a794764ba47759b + Sun, 09 Jun 2024 12:24:07 +0300 + + Спорт + Анна Сатдинова + rssexport.rbc.ru:sport:66656e3c9a794764ba47759b + + 12:24:07 + 09.06.2024 + https://www.rbc.ru/sport/09/06/2024/66656e3c9a794764ba47759b + + 66656e3c9a794764ba47759b + article + 1717925047 + Sun, 09 Jun 2024 12:29:00 +0300 + sport + + + + + https://s0.rbk.ru/v6_top_pics/media/img/0/96/347179243148960.png + image/png + original + copyright + + Матч звезд КХЛ на «СКА Арене»

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/38/347151035455383.jpeg + image/jpeg + original + limited + + Диего Коста

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/37/347089463952377.jpeg + image/jpeg + original + limited + + Евгений Плющенко

]]>
+
+ + + + +
+
+ + <![CDATA[Россияне получили восемь медалей на Азиатской олимпиаде по физике]]> + https://www.rbc.ru/rbcfreenews/66656f199a794764ba4775a1 + Sun, 09 Jun 2024 12:18:46 +0300 + + Общество + rssexport.rbc.ru:society:66656f199a794764ba4775a1 + + 12:18:46 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/66656f199a794764ba4775a1 + + 66656f199a794764ba4775a1 + short_news + 1717924726 + Sun, 09 Jun 2024 12:25:40 +0300 + society + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/98/347179249039985.jpeg + image/jpeg + original + copyright + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/1/95/347177360462951.jpeg + image/jpeg + original + limited + + Мирра Андреева

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/67/347176799049676.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/76/347174196322765.jpeg + image/jpeg + original + limited + + + +
+
+ + <![CDATA[Times узнала, что легенда велоспорта может потерять трофеи из-за долгов]]> + https://www.rbc.ru/sport/09/06/2024/666564dc9a794773c50876ab + Sun, 09 Jun 2024 12:12:11 +0300 + + Спорт + Анна Сатдинова + rssexport.rbc.ru:sport:666564dc9a794773c50876ab + + 12:12:11 + 09.06.2024 + https://www.rbc.ru/sport/09/06/2024/666564dc9a794773c50876ab + + 666564dc9a794773c50876ab + article + 1717924331 + Sun, 09 Jun 2024 12:23:17 +0300 + sport + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/65/347179210481657.png + image/png + original + copyright + + Брэдли Уиггинс

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/2/03/347176686995032.jpeg + image/jpeg + original + limited + + Александр Власов

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/08/347105822152088.jpeg + image/jpeg + original + limited + + Христос Воликакис (в белом)

]]>
+
+ + + + +
+
+ + <![CDATA[Ефимов рассказал о редевелопменте почти 2 тыс. га бывших промзон в Москве]]> + https://www.rbc.ru/rbcfreenews/66633e709a7947818686c01f + Sun, 09 Jun 2024 12:08:02 +0300 + + Экономика + rssexport.rbc.ru:economics:66633e709a7947818686c01f + + 12:08:02 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/66633e709a7947818686c01f + + 66633e709a7947818686c01f + short_news + 1717924082 + Sun, 09 Jun 2024 13:27:09 +0300 + economics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/38/347177801323383.jpeg + image/jpeg + original + copyright + + Владимир Ефимов

]]>
+
+
+ + <![CDATA[Умер бывший директор ЗИЛа Валерий Сайкин]]> + https://www.rbc.ru/politics/09/06/2024/666569c99a79473c35ec8aa4 + Sun, 09 Jun 2024 12:06:35 +0300 + + Политика + Илья Пламенев + rssexport.rbc.ru:politics:666569c99a79473c35ec8aa4 + + 12:06:35 + 09.06.2024 + https://www.rbc.ru/politics/09/06/2024/666569c99a79473c35ec8aa4 + + 666569c99a79473c35ec8aa4 + article + 1717923995 + Sun, 09 Jun 2024 12:14:39 +0300 + politics + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/3/67/347179238466673.jpeg + image/jpeg + original + copyright + + Валерий Сайкин, 1987 г.

]]>
+
+
+ + <![CDATA[Сеул возобновит трансляции на границе с КНДР в ответ на «мусорную войну»]]> + https://www.rbc.ru/rbcfreenews/666566869a7947faaadbf0be + Sun, 09 Jun 2024 11:54:35 +0300 + + Политика + rssexport.rbc.ru:politics:666566869a7947faaadbf0be + + 11:54:35 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/666566869a7947faaadbf0be + + 666566869a7947faaadbf0be + short_news + 1717923275 + Sun, 09 Jun 2024 12:01:05 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/4/78/347179231116784.jpeg + image/jpeg + original + copyright + + + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/99/347176563805998.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/1/36/347173988907361.jpeg + image/jpeg + original + limited + + + + + + + <![CDATA[Над Белгородской областью сбили ракету «Нептун» и два дрона]]> + https://www.rbc.ru/rbcfreenews/66656bfb9a794747ceae4f91 + Sun, 09 Jun 2024 11:52:22 +0300 + + Политика + rssexport.rbc.ru:politics:66656bfb9a794747ceae4f91 + + 11:52:22 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/66656bfb9a794747ceae4f91 + + 66656bfb9a794747ceae4f91 + short_news + 1717923142 + Sun, 09 Jun 2024 11:57:46 +0300 + politics + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/8/74/347179234584748.jpeg + image/jpeg + original + copyright + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/90/347179180733905.jpeg + image/jpeg + original + limited + + Воронеж

]]>
+
+ + + + + + + +
+
+ + <![CDATA[«Вкусно — и точка» передумала открывать рестораны в Абхазии]]> + https://www.rbc.ru/business/09/06/2024/666563739a7947752bc72774 + Sun, 09 Jun 2024 11:45:13 +0300 + + Бизнес + Илья Пламенев + rssexport.rbc.ru:business:666563739a7947752bc72774 + + 11:45:13 + 09.06.2024 + https://www.rbc.ru/business/09/06/2024/666563739a7947752bc72774 + + 666563739a7947752bc72774 + article + 1717922713 + Sun, 09 Jun 2024 11:54:26 +0300 + business + + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/7/78/347179220201787.jpeg + image/jpeg + original + copyright + + + + + + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/0/37/347176711656370.jpeg + image/jpeg + original + limited + + + + + + + <![CDATA[На юге Азербайджана двух пограничников убило ударом молнии]]> + https://www.rbc.ru/rbcfreenews/666563279a7947752bc7276f + Sun, 09 Jun 2024 11:21:46 +0300 + + Общество + rssexport.rbc.ru:society:666563279a7947752bc7276f + + 11:21:46 + 09.06.2024 + https://www.rbc.ru/rbcfreenews/666563279a7947752bc7276f + + 666563279a7947752bc7276f + short_news + 1717921306 + Sun, 09 Jun 2024 11:42:59 +0300 + society + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/64/347179215340646.jpeg + image/jpeg + original + copyright + + + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/44/347171816956445.jpeg + image/jpeg + original + limited + + + + + + + + + + <![CDATA[Welt узнала о поиске Макроном союзников для отправки военных на Украину]]> + https://www.rbc.ru/politics/09/06/2024/6665590d9a794782e847f34c + Sun, 09 Jun 2024 11:13:25 +0300 + + Политика + Илья Пламенев + rssexport.rbc.ru:politics:6665590d9a794782e847f34c + + 11:13:25 + 09.06.2024 + https://www.rbc.ru/politics/09/06/2024/6665590d9a794782e847f34c + + 6665590d9a794782e847f34c + article + 1717920805 + Sun, 09 Jun 2024 11:49:58 +0300 + politics + + + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/06/347179195427066.jpeg + image/jpeg + original + copyright + + Эмманюэль Макрон

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/6/11/347177862057116.jpeg + image/jpeg + original + limited + + + + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/34/347176768866345.jpeg + image/jpeg + original + limited + + Владимир Зеленский

]]>
+
+ + + + + https://s0.rbk.ru/v6_top_pics/media/img/5/09/347178878144095.jpeg + image/jpeg + original + limited + + + +
+
+
+
\ No newline at end of file diff --git a/tests/samples/github-49/result.pkl b/tests/samples/github-49/result.pkl new file mode 100644 index 0000000..3f598b7 Binary files /dev/null and b/tests/samples/github-49/result.pkl differ diff --git a/tests/samples/rss_2.json b/tests/samples/rss_2.json deleted file mode 100644 index efe8fa3..0000000 --- a/tests/samples/rss_2.json +++ /dev/null @@ -1,413 +0,0 @@ -{ - "channel": { - "attributes": {}, - "content": { - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "cloud": null, - "copyright": { - "attributes": {}, - "content": "Copyright 2004 NotePage, Inc." - }, - "description": { - "attributes": {}, - "content": "RSS is a fascinating technology. The uses for RSS are expanding daily. Take a closer look at how various industries are using the benefits of RSS in their businesses." - }, - "docs": { - "attributes": {}, - "content": "http://blogs.law.harvard.edu/tech/rss" - }, - "generator": { - "attributes": {}, - "content": "FeedForAll Beta1 (0.0.1.8)" - }, - "image": { - "attributes": {}, - "content": { - "description": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "height": { - "attributes": {}, - "content": 48 - }, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/industry-solutions.htm" - }, - "title": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "url": { - "attributes": {}, - "content": "http://www.feedforall.com/ffalogo48x48.gif" - }, - "width": { - "attributes": {}, - "content": 48 - } - } - }, - "items": [ - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Restaurant's communicate with customers. Let your customers know the latest specials or events.
\n
\nRSS feed uses include:
\nDaily Specials
\nEntertainment
\nCalendar of Events
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/restaurant.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:11 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Restaurants" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Educational Institutions communicate with students about school wide activities, events, and schedules.
\n
\nRSS feed uses include:
\nHomework Assignments
\nSchool Cancellations
\nCalendar of Events
\nSports Scores
\nClubs/Organization Meetings
\nLunches Menus
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/schools.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:09 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Schools and Colleges" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Computer Service Companies communicate with clients about cyber security and related issues.
\n
\nUses include:
\nCyber Security Alerts
\nSpecials
\nJob Postings
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/computer-service.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:07 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Computer Service Companies" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Governments communicate with the general public about positions on various issues, and keep the community aware of changes in important legislative issues.
\n

\nRSS uses Include:
\nLegislative Calendar
\nVotes
\nBulletins
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/government.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:05 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Governments" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Politicians communicate with the general public about positions on various issues, and keep the community notified of their schedule.
\n
\nUses Include:
\nBlogs
\nSpeaking Engagements
\nStatements
\n
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/politics.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:03 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Politicians" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Meteorologists communicate with the general public about storm warnings and weather alerts, in specific regions. Using RSS meteorologists are able to quickly disseminate urgent and life threatening weather warnings.
\n
\nUses Include:
\nWeather Alerts
\nPlotting Storms
\nSchool Cancellations
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/weather.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:01 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Meteorologists" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Realtors and Real Estate companies communicate with clients informing them of newly available properties, and open house announcements. RSS helps to reach a targeted audience and spread the word in an inexpensive, professional manner.
\n

\nFeeds can be used for:
\nOpen House Dates
\nNew Properties For Sale
\nMortgage Rates
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/real-estate.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:08:59 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Realtors & Real Estate Firms" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Banks, Credit Unions and Mortgage companies communicate with the general public about rate changes in a prompt and professional manner.
\n
\nUses include:
\nMortgage Rates
\nForeign Exchange Rates
\nBank Rates
\nSpecials
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/banks.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:08:57 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Banks / Mortgage Companies" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Law Enforcement Professionals communicate with the general public and other agencies in a prompt and efficient manner. Using RSS police are able to quickly disseminate urgent and life threatening information.
\n
\nUses include:
\nAmber Alerts
\nSex Offender Community Notification
\nWeather Alerts
\nScheduling
\nSecurity Alerts
\nPolice Report
\nMeetings
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/law-enforcement.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:08:56 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Law Enforcement" - } - } - } - ], - "language": { - "attributes": {}, - "content": "en-us" - }, - "last_build_date": { - "attributes": {}, - "content": "2004-10-19T13:39:14-04:00" - }, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/industry-solutions.htm" - }, - "managing_editor": { - "attributes": {}, - "content": "marketing@feedforall.com" - }, - "pub_date": { - "attributes": {}, - "content": "2004-10-19T13:38:55-04:00" - }, - "rating": null, - "skip_days": null, - "skip_hours": null, - "text_input": null, - "title": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "ttl": null, - "web_master": { - "attributes": {}, - "content": "webmaster@feedforall.com" - } - } - }, - "version": { - "attributes": {}, - "content": "2.0" - } -} \ No newline at end of file diff --git a/tests/samples/rss_2.xml b/tests/samples/rss_2/data.xml similarity index 100% rename from tests/samples/rss_2.xml rename to tests/samples/rss_2/data.xml diff --git a/tests/samples/rss_2/result.pkl b/tests/samples/rss_2/result.pkl new file mode 100644 index 0000000..2d6549e Binary files /dev/null and b/tests/samples/rss_2/result.pkl differ diff --git a/tests/samples/rss_2_no_category_attr.json b/tests/samples/rss_2_no_category_attr.json deleted file mode 100644 index d32566a..0000000 --- a/tests/samples/rss_2_no_category_attr.json +++ /dev/null @@ -1,409 +0,0 @@ -{ - "channel": { - "attributes": {}, - "content": { - "category": { - "attributes": {}, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "cloud": null, - "copyright": { - "attributes": {}, - "content": "Copyright 2004 NotePage, Inc." - }, - "description": { - "attributes": {}, - "content": "RSS is a fascinating technology. The uses for RSS are expanding daily. Take a closer look at how various industries are using the benefits of RSS in their businesses." - }, - "docs": { - "attributes": {}, - "content": "http://blogs.law.harvard.edu/tech/rss" - }, - "generator": { - "attributes": {}, - "content": "FeedForAll Beta1 (0.0.1.8)" - }, - "image": { - "attributes": {}, - "content": { - "description": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "height": { - "attributes": {}, - "content": 48 - }, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/industry-solutions.htm" - }, - "title": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "url": { - "attributes": {}, - "content": "http://www.feedforall.com/ffalogo48x48.gif" - }, - "width": { - "attributes": {}, - "content": 48 - } - } - }, - "items": [ - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": {}, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Restaurant's communicate with customers. Let your customers know the latest specials or events.
\n
\nRSS feed uses include:
\nDaily Specials
\nEntertainment
\nCalendar of Events
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/restaurant.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:11 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Restaurants" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Educational Institutions communicate with students about school wide activities, events, and schedules.
\n
\nRSS feed uses include:
\nHomework Assignments
\nSchool Cancellations
\nCalendar of Events
\nSports Scores
\nClubs/Organization Meetings
\nLunches Menus
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/schools.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:09 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Schools and Colleges" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Computer Service Companies communicate with clients about cyber security and related issues.
\n
\nUses include:
\nCyber Security Alerts
\nSpecials
\nJob Postings
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/computer-service.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:07 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Computer Service Companies" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Governments communicate with the general public about positions on various issues, and keep the community aware of changes in important legislative issues.
\n

\nRSS uses Include:
\nLegislative Calendar
\nVotes
\nBulletins
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/government.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:05 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Governments" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Politicians communicate with the general public about positions on various issues, and keep the community notified of their schedule.
\n
\nUses Include:
\nBlogs
\nSpeaking Engagements
\nStatements
\n
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/politics.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:03 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Politicians" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Meteorologists communicate with the general public about storm warnings and weather alerts, in specific regions. Using RSS meteorologists are able to quickly disseminate urgent and life threatening weather warnings.
\n
\nUses Include:
\nWeather Alerts
\nPlotting Storms
\nSchool Cancellations
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/weather.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:01 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Meteorologists" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Realtors and Real Estate companies communicate with clients informing them of newly available properties, and open house announcements. RSS helps to reach a targeted audience and spread the word in an inexpensive, professional manner.
\n

\nFeeds can be used for:
\nOpen House Dates
\nNew Properties For Sale
\nMortgage Rates
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/real-estate.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:08:59 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Realtors & Real Estate Firms" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Banks, Credit Unions and Mortgage companies communicate with the general public about rate changes in a prompt and professional manner.
\n
\nUses include:
\nMortgage Rates
\nForeign Exchange Rates
\nBank Rates
\nSpecials
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/banks.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:08:57 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Banks / Mortgage Companies" - } - } - }, - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Law Enforcement Professionals communicate with the general public and other agencies in a prompt and efficient manner. Using RSS police are able to quickly disseminate urgent and life threatening information.
\n
\nUses include:
\nAmber Alerts
\nSex Offender Community Notification
\nWeather Alerts
\nScheduling
\nSecurity Alerts
\nPolice Report
\nMeetings
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/law-enforcement.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:08:56 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Law Enforcement" - } - } - } - ], - "language": { - "attributes": {}, - "content": "en-us" - }, - "last_build_date": { - "attributes": {}, - "content": "2004-10-19T13:39:14-04:00" - }, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/industry-solutions.htm" - }, - "managing_editor": { - "attributes": {}, - "content": "marketing@feedforall.com" - }, - "pub_date": { - "attributes": {}, - "content": "2004-10-19T13:38:55-04:00" - }, - "rating": null, - "skip_days": null, - "skip_hours": null, - "text_input": null, - "title": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "ttl": null, - "web_master": { - "attributes": {}, - "content": "webmaster@feedforall.com" - } - } - }, - "version": { - "attributes": {}, - "content": "2.0" - } -} \ No newline at end of file diff --git a/tests/samples/rss_2_no_category_attr.xml b/tests/samples/rss_2_no_category_attr/data.xml similarity index 100% rename from tests/samples/rss_2_no_category_attr.xml rename to tests/samples/rss_2_no_category_attr/data.xml diff --git a/tests/samples/rss_2_no_category_attr/result.pkl b/tests/samples/rss_2_no_category_attr/result.pkl new file mode 100644 index 0000000..55292f7 Binary files /dev/null and b/tests/samples/rss_2_no_category_attr/result.pkl differ diff --git a/tests/samples/rss_2_no_category_attr_plain.json b/tests/samples/rss_2_no_category_attr_plain.json deleted file mode 100644 index d404ff4..0000000 --- a/tests/samples/rss_2_no_category_attr_plain.json +++ /dev/null @@ -1,141 +0,0 @@ -{ - "channel": { - "category": "Computers/Software/Internet/Site Management/Content Management", - "cloud": null, - "copyright": "Copyright 2004 NotePage, Inc.", - "description": "RSS is a fascinating technology. The uses for RSS are expanding daily. Take a closer look at how various industries are using the benefits of RSS in their businesses.", - "docs": "http://blogs.law.harvard.edu/tech/rss", - "generator": "FeedForAll Beta1 (0.0.1.8)", - "image": { - "description": "FeedForAll Sample Feed", - "height": 48, - "link": "http://www.feedforall.com/industry-solutions.htm", - "title": "FeedForAll Sample Feed", - "url": "http://www.feedforall.com/ffalogo48x48.gif", - "width": 48 - }, - "items": [ - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Restaurant's communicate with customers. Let your customers know the latest specials or events.
\n
\nRSS feed uses include:
\nDaily Specials
\nEntertainment
\nCalendar of Events
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/restaurant.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:11 -0400", - "source": null, - "title": "RSS Solutions for Restaurants" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Educational Institutions communicate with students about school wide activities, events, and schedules.
\n
\nRSS feed uses include:
\nHomework Assignments
\nSchool Cancellations
\nCalendar of Events
\nSports Scores
\nClubs/Organization Meetings
\nLunches Menus
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/schools.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:09 -0400", - "source": null, - "title": "RSS Solutions for Schools and Colleges" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Computer Service Companies communicate with clients about cyber security and related issues.
\n
\nUses include:
\nCyber Security Alerts
\nSpecials
\nJob Postings
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/computer-service.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:07 -0400", - "source": null, - "title": "RSS Solutions for Computer Service Companies" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Governments communicate with the general public about positions on various issues, and keep the community aware of changes in important legislative issues.
\n

\nRSS uses Include:
\nLegislative Calendar
\nVotes
\nBulletins
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/government.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:05 -0400", - "source": null, - "title": "RSS Solutions for Governments" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Politicians communicate with the general public about positions on various issues, and keep the community notified of their schedule.
\n
\nUses Include:
\nBlogs
\nSpeaking Engagements
\nStatements
\n
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/politics.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:03 -0400", - "source": null, - "title": "RSS Solutions for Politicians" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Meteorologists communicate with the general public about storm warnings and weather alerts, in specific regions. Using RSS meteorologists are able to quickly disseminate urgent and life threatening weather warnings.
\n
\nUses Include:
\nWeather Alerts
\nPlotting Storms
\nSchool Cancellations
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/weather.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:01 -0400", - "source": null, - "title": "RSS Solutions for Meteorologists" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Realtors and Real Estate companies communicate with clients informing them of newly available properties, and open house announcements. RSS helps to reach a targeted audience and spread the word in an inexpensive, professional manner.
\n

\nFeeds can be used for:
\nOpen House Dates
\nNew Properties For Sale
\nMortgage Rates
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/real-estate.htm", - "pub_date": "Tue, 19 Oct 2004 11:08:59 -0400", - "source": null, - "title": "RSS Solutions for Realtors & Real Estate Firms" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Banks, Credit Unions and Mortgage companies communicate with the general public about rate changes in a prompt and professional manner.
\n
\nUses include:
\nMortgage Rates
\nForeign Exchange Rates
\nBank Rates
\nSpecials
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/banks.htm", - "pub_date": "Tue, 19 Oct 2004 11:08:57 -0400", - "source": null, - "title": "RSS Solutions for Banks / Mortgage Companies" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Law Enforcement Professionals communicate with the general public and other agencies in a prompt and efficient manner. Using RSS police are able to quickly disseminate urgent and life threatening information.
\n
\nUses include:
\nAmber Alerts
\nSex Offender Community Notification
\nWeather Alerts
\nScheduling
\nSecurity Alerts
\nPolice Report
\nMeetings
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/law-enforcement.htm", - "pub_date": "Tue, 19 Oct 2004 11:08:56 -0400", - "source": null, - "title": "RSS Solutions for Law Enforcement" - } - ], - "language": "en-us", - "last_build_date": "2004-10-19T13:39:14-04:00", - "link": "http://www.feedforall.com/industry-solutions.htm", - "managing_editor": "marketing@feedforall.com", - "pub_date": "2004-10-19T13:38:55-04:00", - "rating": null, - "skip_days": null, - "skip_hours": null, - "text_input": null, - "title": "FeedForAll Sample Feed", - "ttl": null, - "web_master": "webmaster@feedforall.com" - }, - "version": "2.0" -} \ No newline at end of file diff --git a/tests/samples/rss_2_plain.json b/tests/samples/rss_2_plain.json deleted file mode 100644 index d404ff4..0000000 --- a/tests/samples/rss_2_plain.json +++ /dev/null @@ -1,141 +0,0 @@ -{ - "channel": { - "category": "Computers/Software/Internet/Site Management/Content Management", - "cloud": null, - "copyright": "Copyright 2004 NotePage, Inc.", - "description": "RSS is a fascinating technology. The uses for RSS are expanding daily. Take a closer look at how various industries are using the benefits of RSS in their businesses.", - "docs": "http://blogs.law.harvard.edu/tech/rss", - "generator": "FeedForAll Beta1 (0.0.1.8)", - "image": { - "description": "FeedForAll Sample Feed", - "height": 48, - "link": "http://www.feedforall.com/industry-solutions.htm", - "title": "FeedForAll Sample Feed", - "url": "http://www.feedforall.com/ffalogo48x48.gif", - "width": 48 - }, - "items": [ - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Restaurant's communicate with customers. Let your customers know the latest specials or events.
\n
\nRSS feed uses include:
\nDaily Specials
\nEntertainment
\nCalendar of Events
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/restaurant.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:11 -0400", - "source": null, - "title": "RSS Solutions for Restaurants" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Educational Institutions communicate with students about school wide activities, events, and schedules.
\n
\nRSS feed uses include:
\nHomework Assignments
\nSchool Cancellations
\nCalendar of Events
\nSports Scores
\nClubs/Organization Meetings
\nLunches Menus
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/schools.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:09 -0400", - "source": null, - "title": "RSS Solutions for Schools and Colleges" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Computer Service Companies communicate with clients about cyber security and related issues.
\n
\nUses include:
\nCyber Security Alerts
\nSpecials
\nJob Postings
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/computer-service.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:07 -0400", - "source": null, - "title": "RSS Solutions for Computer Service Companies" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Governments communicate with the general public about positions on various issues, and keep the community aware of changes in important legislative issues.
\n

\nRSS uses Include:
\nLegislative Calendar
\nVotes
\nBulletins
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/government.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:05 -0400", - "source": null, - "title": "RSS Solutions for Governments" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Politicians communicate with the general public about positions on various issues, and keep the community notified of their schedule.
\n
\nUses Include:
\nBlogs
\nSpeaking Engagements
\nStatements
\n
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/politics.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:03 -0400", - "source": null, - "title": "RSS Solutions for Politicians" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Meteorologists communicate with the general public about storm warnings and weather alerts, in specific regions. Using RSS meteorologists are able to quickly disseminate urgent and life threatening weather warnings.
\n
\nUses Include:
\nWeather Alerts
\nPlotting Storms
\nSchool Cancellations
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/weather.htm", - "pub_date": "Tue, 19 Oct 2004 11:09:01 -0400", - "source": null, - "title": "RSS Solutions for Meteorologists" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Realtors and Real Estate companies communicate with clients informing them of newly available properties, and open house announcements. RSS helps to reach a targeted audience and spread the word in an inexpensive, professional manner.
\n

\nFeeds can be used for:
\nOpen House Dates
\nNew Properties For Sale
\nMortgage Rates
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/real-estate.htm", - "pub_date": "Tue, 19 Oct 2004 11:08:59 -0400", - "source": null, - "title": "RSS Solutions for Realtors & Real Estate Firms" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Banks, Credit Unions and Mortgage companies communicate with the general public about rate changes in a prompt and professional manner.
\n
\nUses include:
\nMortgage Rates
\nForeign Exchange Rates
\nBank Rates
\nSpecials
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/banks.htm", - "pub_date": "Tue, 19 Oct 2004 11:08:57 -0400", - "source": null, - "title": "RSS Solutions for Banks / Mortgage Companies" - }, - { - "author": null, - "category": "Computers/Software/Internet/Site Management/Content Management", - "comments": "http://www.feedforall.com/forum", - "description": "FeedForAll helps Law Enforcement Professionals communicate with the general public and other agencies in a prompt and efficient manner. Using RSS police are able to quickly disseminate urgent and life threatening information.
\n
\nUses include:
\nAmber Alerts
\nSex Offender Community Notification
\nWeather Alerts
\nScheduling
\nSecurity Alerts
\nPolice Report
\nMeetings
", - "enclosure": null, - "guid": null, - "link": "http://www.feedforall.com/law-enforcement.htm", - "pub_date": "Tue, 19 Oct 2004 11:08:56 -0400", - "source": null, - "title": "RSS Solutions for Law Enforcement" - } - ], - "language": "en-us", - "last_build_date": "2004-10-19T13:39:14-04:00", - "link": "http://www.feedforall.com/industry-solutions.htm", - "managing_editor": "marketing@feedforall.com", - "pub_date": "2004-10-19T13:38:55-04:00", - "rating": null, - "skip_days": null, - "skip_hours": null, - "text_input": null, - "title": "FeedForAll Sample Feed", - "ttl": null, - "web_master": "webmaster@feedforall.com" - }, - "version": "2.0" -} \ No newline at end of file diff --git a/tests/samples/rss_2_with_1_item.json b/tests/samples/rss_2_with_1_item.json deleted file mode 100644 index d78c35c..0000000 --- a/tests/samples/rss_2_with_1_item.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "channel": { - "attributes": {}, - "content": { - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "cloud": null, - "copyright": { - "attributes": {}, - "content": "Copyright 2004 NotePage, Inc." - }, - "description": { - "attributes": {}, - "content": "RSS is a fascinating technology. The uses for RSS are expanding daily. Take a closer look at how various industries are using the benefits of RSS in their businesses." - }, - "docs": { - "attributes": {}, - "content": "http://blogs.law.harvard.edu/tech/rss" - }, - "generator": { - "attributes": {}, - "content": "FeedForAll Beta1 (0.0.1.8)" - }, - "image": { - "attributes": {}, - "content": { - "description": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "height": { - "attributes": {}, - "content": 48 - }, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/industry-solutions.htm" - }, - "title": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "url": { - "attributes": {}, - "content": "http://www.feedforall.com/ffalogo48x48.gif" - }, - "width": { - "attributes": {}, - "content": 48 - } - } - }, - "items": [ - { - "attributes": {}, - "content": { - "author": null, - "category": { - "attributes": { - "domain": "www.dmoz.com" - }, - "content": "Computers/Software/Internet/Site Management/Content Management" - }, - "comments": { - "attributes": {}, - "content": "http://www.feedforall.com/forum" - }, - "description": { - "attributes": {}, - "content": "FeedForAll helps Restaurant's communicate with customers. Let your customers know the latest specials or events.
\n
\nRSS feed uses include:
\nDaily Specials
\nEntertainment
\nCalendar of Events
" - }, - "enclosure": null, - "guid": null, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/restaurant.htm" - }, - "pub_date": { - "attributes": {}, - "content": "Tue, 19 Oct 2004 11:09:11 -0400" - }, - "source": null, - "title": { - "attributes": {}, - "content": "RSS Solutions for Restaurants" - } - } - } - ], - "language": { - "attributes": {}, - "content": "en-us" - }, - "last_build_date": { - "attributes": {}, - "content": "2004-10-19T13:39:14-04:00" - }, - "link": { - "attributes": {}, - "content": "http://www.feedforall.com/industry-solutions.htm" - }, - "managing_editor": { - "attributes": {}, - "content": "marketing@feedforall.com" - }, - "pub_date": { - "attributes": {}, - "content": "2004-10-19T13:38:55-04:00" - }, - "rating": null, - "skip_days": null, - "skip_hours": null, - "text_input": null, - "title": { - "attributes": {}, - "content": "FeedForAll Sample Feed" - }, - "ttl": null, - "web_master": { - "attributes": {}, - "content": "webmaster@feedforall.com" - } - } - }, - "version": { - "attributes": {}, - "content": "2.0" - } -} \ No newline at end of file diff --git a/tests/samples/rss_2_with_1_item.xml b/tests/samples/rss_2_with_1_item/data.xml similarity index 100% rename from tests/samples/rss_2_with_1_item.xml rename to tests/samples/rss_2_with_1_item/data.xml diff --git a/tests/samples/rss_2_with_1_item/result.pkl b/tests/samples/rss_2_with_1_item/result.pkl new file mode 100644 index 0000000..2d3b0ce Binary files /dev/null and b/tests/samples/rss_2_with_1_item/result.pkl differ diff --git a/tests/test_parsing.py b/tests/test_parsing.py index dcbbee2..8d7ea96 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -1,84 +1,51 @@ -from json import dumps +import logging +from typing import Type import pytest -from rss_parser import AtomParser, RSSParser - - -@pytest.mark.parametrize( - "sample_and_result", - [ - ["rss_2"], - ["rss_2_no_category_attr"], - ["apology_line"], - ["rss_2_with_1_item"], - ], - indirect=True, -) -def test_parses_all_rss_samples(sample_and_result): - sample, result = sample_and_result - rss = RSSParser.parse(sample) - - assert rss - - left = rss.json(indent=2, sort_keys=True) - right = dumps(result, indent=2, sort_keys=True, default=str) - - assert left == right - - -@pytest.mark.parametrize( - "sample_and_result", [["rss_2", True], ["rss_2_no_category_attr", True], ["apology_line", True]], indirect=True -) -def test_json_plain_ignores_attributes(sample_and_result): - # Expect basic RSSv2 to be parsed - sample, result = sample_and_result - rss = RSSParser.parse(sample) - - assert rss - - left = rss.json_plain(indent=2, sort_keys=True) - right = dumps(result, indent=2, sort_keys=True, default=str) - - assert left == right - - -@pytest.mark.parametrize( - "sample_and_result", - [ - ["atom"], - ["generic_atom_feed"], - ], - indirect=True, -) -def test_parses_all_atom_samples(sample_and_result): - sample, result = sample_and_result - atom = AtomParser.parse(sample) - - assert atom - - left = atom.json(indent=2, sort_keys=True) - right = dumps(result, indent=2, sort_keys=True, default=str) - - assert left == right - - -@pytest.mark.parametrize( - "sample_and_result", - [ - ["atom", True], - ["generic_atom_feed", True], - ], - indirect=True, -) -def test_json_plain_ignores_attributes_atom(sample_and_result): - # Expect basic RSSv2 to be parsed - sample, result = sample_and_result - rss = AtomParser.parse(sample) - - assert rss - - left = rss.json_plain(indent=2, sort_keys=True) - right = dumps(result, indent=2, sort_keys=True, default=str) - - assert left == right +from rss_parser import AtomParser, BaseParser, RSSParser + +logger = logging.getLogger(__name__) + + +class DataHelper: + @staticmethod + def compare_parsing(sample_and_result, parser: Type[BaseParser]): + sample, result = sample_and_result + rss = parser.parse(sample) + + assert rss + + parsed = rss.dict() + assert parsed == result + + +@pytest.mark.usefixtures("sample_and_result") +class TestRSS: + @pytest.mark.parametrize( + "sample_and_result", + [ + ["rss_2"], + ["rss_2_no_category_attr"], + ["apology_line"], + ["rss_2_with_1_item"], + ["github-49"], + ], + indirect=True, + ) + def test_parses_all_rss_samples(self, sample_and_result): + DataHelper.compare_parsing(sample_and_result, parser=RSSParser) + + +@pytest.mark.usefixtures("sample_and_result") +class TestAtom: + @pytest.mark.parametrize( + "sample_and_result", + [ + ["atom"], + ["generic_atom_feed"], + ], + indirect=True, + ) + def test_parses_all_atom_samples(self, sample_and_result): + DataHelper.compare_parsing(sample_and_result, parser=AtomParser)