mate-academy · OttoLindholm · Jan 23, 2025 · Jan 23, 2025 · mateacademy-ai-mentor · Jan 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,6 @@
 *.iml
 .env
 .DS_Store
-venv/
+.venv/
 .pytest_cache/
 **__pycache__/
diff --git a/app/parse.py b/app/parse.py
@@ -1,4 +1,11 @@
-from dataclasses import dataclass
+import csv
+from dataclasses import dataclass, fields, astuple
+
+import requests
+from bs4 import BeautifulSoup, Tag
+
+
+BASE_URL = "https://quotes.toscrape.com/"
 
 
 @dataclass
@@ -8,8 +15,37 @@ class Quote:
     tags: list[str]
 
 
+QUOTE_FIELDS = [field.name for field in fields(Quote)]
+
+
+def parse_single_quote(quote: Tag) -> Quote:
+    return Quote(
+        text=quote.select_one(".text").text,
+        author=quote.select_one(".author").text,
+        tags=[tag.text for tag in quote.select(".tag")],
+    )
+
+
+def parse_qutes(soup: Tag) -> list[Quote]:
+    quotes = [parse_single_quote(quote) for quote in soup.select(".quote")]
+    while next := soup.select_one(".next > a"):
+        text = requests.get(BASE_URL + next["href"]).content
+        soup = BeautifulSoup(text, "html.parser")
+        quotes.extend(
+            [parse_single_quote(quote) for quote in soup.select(".quote")]
+        )
+    return quotes
+
+
 def main(output_csv_path: str) -> None:
-    pass
+    text = requests.get(BASE_URL).content
+    soup = BeautifulSoup(text, "html.parser")
+    quotes = parse_qutes(soup)
+
+    with open(output_csv_path, "w", encoding="utf-8", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow(QUOTE_FIELDS)
+        writer.writerows([astuple(quote) for quote in quotes])
 
 
 if __name__ == "__main__":

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,23 @@
+attrs==24.3.0
+beautifulsoup4==4.12.3
+certifi==2024.12.14
+charset-normalizer==3.4.1
+colorama==0.4.6
 flake8==5.0.4
 flake8-annotations==2.9.1
 flake8-quotes==3.3.1
 flake8-variables-names==0.0.5
+idna==3.10
+iniconfig==2.0.0
+mccabe==0.7.0
+packaging==24.2
 pep8-naming==0.13.2
+pluggy==1.5.0
+py==1.11.0
+pycodestyle==2.9.1
+pyflakes==2.5.0
 pytest==7.1.3
+requests==2.32.3
+soupsieve==2.6
+tomli==2.2.1
+urllib3==2.3.0