mate-academy · xborismenx · Jan 18, 2025 · Jan 18, 2025 · Jan 18, 2025 · Jan 18, 2025
diff --git a/app/parse.py b/app/parse.py
@@ -1,4 +1,10 @@
+import csv
+import dataclasses
 from dataclasses import dataclass
+import requests
+from bs4 import BeautifulSoup, Tag, ResultSet
+
+HOME_PAGE_URL = "https://quotes.toscrape.com"
 
 
 @dataclass
@@ -8,8 +14,51 @@ class Quote:
     tags: list[str]
 
 
+def get_single_quote(single_quote: Tag) -> Quote:
+    text = single_quote.find_all("span", class_="text")[0].text
+    author = single_quote.find_all("small", class_="author")[0].text
+    tags = [elem.text for elem in single_quote.find_all("a", class_="tag")]
+    return Quote(text=text, author=author, tags=tags)
+
+
+def page_with_quotes(soup_page: BeautifulSoup) -> ResultSet:
+    block_quote = soup_page.select(".quote")
+    return block_quote
+
+
+def next_page(page: BeautifulSoup) -> str | None:
+    page = page.find("li", class_="next")
+    if page:
+        tag_a = page.find("a")
+        if tag_a and "href" in tag_a.attrs:
+            return HOME_PAGE_URL + tag_a.get("href")
+    return None
+
+
 def main(output_csv_path: str) -> None:
-    pass
+    all_quotes = []
+    url = HOME_PAGE_URL
+    while url:
+        try:
+            response = requests.get(url).content
+            soup = BeautifulSoup(response, "html.parser")
+
+            if page_with_quotes(soup):
+                block_quote = page_with_quotes(soup)
+                parsed_quotes = [get_single_quote(info)
+                                 for info in block_quote]
+                all_quotes.extend(quotes.__dict__ for quotes in parsed_quotes)
+            url = next_page(soup)
+        except requests.RequestException as e:
+            print(f"When parse got error {e}")
+            break
+
+    with open(output_csv_path, "w", newline="", encoding="utf-8") as csvfile:
+        fieldnames = [attr.name for attr in dataclasses.fields(Quote)]
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+
+        writer.writerows(all_quotes)
 
 
 if __name__ == "__main__":

diff --git a/requirements.txt b/requirements.txt