From 9047ecf1139498c26ab81e38654452e9d22d464b Mon Sep 17 00:00:00 2001 From: Sergiy Gorbachov Date: Thu, 23 Jan 2025 16:43:12 +0200 Subject: [PATCH 1/2] Solution --- .gitignore | 2 +- app/parse.py | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index b26d611..1be53e5 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,6 @@ *.iml .env .DS_Store -venv/ +.venv/ .pytest_cache/ **__pycache__/ diff --git a/app/parse.py b/app/parse.py index 8d15be0..a37748a 100644 --- a/app/parse.py +++ b/app/parse.py @@ -1,4 +1,11 @@ -from dataclasses import dataclass +import csv +from dataclasses import dataclass, fields, astuple + +import requests +from bs4 import BeautifulSoup, Tag + + +BASE_URL = "https://quotes.toscrape.com/" @dataclass @@ -8,8 +15,37 @@ class Quote: tags: list[str] +QUOTE_FIELDS = [field.name for field in fields(Quote)] + + +def parse_single_quote(quote: Tag) -> Quote: + return Quote( + text=quote.select_one(".text").text, + author=quote.select_one(".author").text, + tags=[tag.text for tag in quote.select(".tag")], + ) + + +def parse_qutes(soup: Tag) -> list[Quote]: + quotes = [parse_single_quote(quote) for quote in soup.select(".quote")] + while next := soup.select_one(".next > a"): + text = requests.get(BASE_URL + next["href"]).content + soup = BeautifulSoup(text, "html.parser") + quotes.extend( + [parse_single_quote(quote) for quote in soup.select(".quote")] + ) + return quotes + + def main(output_csv_path: str) -> None: - pass + text = requests.get(BASE_URL).content + soup = BeautifulSoup(text, "html.parser") + quotes = parse_qutes(soup) + + with open(output_csv_path, "w", encoding="utf-8", newline="") as f: + writer = csv.writer(f) + writer.writerow(QUOTE_FIELDS) + writer.writerows([astuple(quote) for quote in quotes]) if __name__ == "__main__": From 141107bea94a29334f99365591ff5c20f1e29371 Mon Sep 17 00:00:00 2001 From: Sergiy Gorbachov Date: Thu, 23 Jan 2025 16:47:14 +0200 Subject: [PATCH 2/2] Solution --- requirements.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/requirements.txt b/requirements.txt index 3f202d6..be3dc23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,23 @@ +attrs==24.3.0 +beautifulsoup4==4.12.3 +certifi==2024.12.14 +charset-normalizer==3.4.1 +colorama==0.4.6 flake8==5.0.4 flake8-annotations==2.9.1 flake8-quotes==3.3.1 flake8-variables-names==0.0.5 +idna==3.10 +iniconfig==2.0.0 +mccabe==0.7.0 +packaging==24.2 pep8-naming==0.13.2 +pluggy==1.5.0 +py==1.11.0 +pycodestyle==2.9.1 +pyflakes==2.5.0 pytest==7.1.3 +requests==2.32.3 +soupsieve==2.6 +tomli==2.2.1 +urllib3==2.3.0