build: Upgrade Django. chore: Use ruff, uv.

opencivicdata · Sep 16, 2024 · 5fdd7d4 · 5fdd7d4
1 parent 1fe12c6
commit 5fdd7d4
Show file tree

Hide file tree

Showing 14 changed files with 124 additions and 76 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,16 +1,17 @@
 ci:
   autoupdate_schedule: quarterly
+  skip: [pip-compile]
+default_language_version:
+    python: python3.10
 repos:
-  - repo: https://github.com/psf/black
-    rev: 24.4.2
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.3
     hooks:
-      - id: black
-  - repo: https://github.com/pycqa/flake8
-    rev: 7.1.0
+      - id: ruff
+      - id: ruff-format
+  - repo: https://github.com/astral-sh/uv-pre-commit
+    rev: 0.4.4
     hooks:
-      - id: flake8
-        additional_dependencies: [flake8-comprehensions]
-  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
+      - id: pip-compile
+        name: pip-compile requirements.in
+        args: [requirements.in, -o, requirements.txt]
diff --git a/ca_bc_coquitlam/people.py b/ca_bc_coquitlam/people.py
@@ -7,7 +7,6 @@
 
 
 class CoquitlamPersonScraper(CanadianScraper):
-
     def scrape(self):
         def build_email(script):
             w = re.findall(r'w = "(.*?)"', script)[0]

diff --git a/ca_bc_surrey/people.py b/ca_bc_surrey/people.py
@@ -12,7 +12,6 @@ def scrape(self):
         assert len(members), "No members found"
         seat_number = 1
         for member in members:
-
             role, name = member.xpath('.//a[@class="teaser__link"]/h4')[0].text_content().split(" ", 1)
             district = "Surrey (seat {})".format(seat_number)
             seat_number += 1

diff --git a/ca_nl/people.py b/ca_nl/people.py
@@ -1,9 +1,8 @@
 import json
 import re
 
-from utils import CUSTOM_USER_AGENT
+from utils import CUSTOM_USER_AGENT, CanadianScraper
 from utils import CanadianPerson as Person
-from utils import CanadianScraper
 
 COUNCIL_PAGE = "https://www.assembly.nl.ca/js/members-index.js"
 
@@ -26,9 +25,7 @@ def scrape(self):
         page = self.get(COUNCIL_PAGE)
         members = re.search(
             r"members = (\[(.+)\]);", page.content.decode().replace("<em>[Member-elect]</em>", ""), re.DOTALL
-        ).groups()[
-            0
-        ]  # extract javascript array
+        ).groups()[0]  # extract javascript array
         members = re.sub("<!--.+?-->", "", members)  # remove comments
         members = re.sub("<a.+?>", "", members).replace("</a>", "")  # tags
         members = members.replace('"', r"\"")  # escape double quotes
@@ -60,7 +57,8 @@ def scrape(self):
             )
             if member.get("email"):
                 p.add_contact(
-                    "email", member["email"].replace("@[email protected]", "@gov.nl.ca")  # seriously guys?!
+                    "email",
+                    member["email"].replace("@[email protected]", "@gov.nl.ca"),  # seriously guys?!
                 )
 
             p.add_source(COUNCIL_PAGE)

diff --git a/ca_ns_cape_breton/people.py b/ca_ns_cape_breton/people.py
@@ -1,9 +1,8 @@
 import html
 import re
 
-from utils import CUSTOM_USER_AGENT
+from utils import CUSTOM_USER_AGENT, CanadianScraper
 from utils import CanadianPerson as Person
-from utils import CanadianScraper
 
 COUNCIL_PAGE = "http://www.cbrm.ns.ca/mayor-council-2.html"
 MAYOR_PAGE = "http://www.cbrm.ns.ca/mayor"

diff --git a/ca_qc_cote_saint_luc/people.py b/ca_qc_cote_saint_luc/people.py
@@ -1,6 +1,5 @@
-from utils import CUSTOM_USER_AGENT
+from utils import CUSTOM_USER_AGENT, CanadianScraper
 from utils import CanadianPerson as Person
-from utils import CanadianScraper
 
 COUNCIL_PAGE = "https://cotesaintluc.org/fr/affaires-municipales/membres-du-conseil/"
 

diff --git a/patch.py b/patch.py
@@ -27,9 +27,9 @@
     (r"\A1 \d{3} \d{3}-\d{4}(?: x\d+)?\Z", lambda x: x["type"] in ("text", "voice", "fax", "cell", "video", "pager")),
 ]
 # Validate the format of contact_details[].note.
-_contact_details["items"]["properties"]["note"][
-    "pattern"
-] = r"\A(?:constituency|legislature|office|residence|)(?: \(\d\))?\Z"
+_contact_details["items"]["properties"]["note"]["pattern"] = (
+    r"\A(?:constituency|legislature|office|residence|)(?: \(\d\))?\Z"
+)
 # contact_details[] must not include unexpected properties.
 _contact_details["items"]["additionalProperties"] = False
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,11 @@
-[tool.black]
+[project]
+name = "scrapers_ca"
+version = "0.0.1"
+
+[tool.ruff]
 line-length = 119
+target-version = "py310"
 
-[tool.isort]
-profile = 'black'
-line_length = 119
+[tool.ruff.lint]
+select = ["C4", "E", "F", "I", "W"]
+ignore = ["E501"]
diff --git a/requirements.in b/requirements.in
@@ -0,0 +1,11 @@
+# 0.9.0 uses jsonschema instead of validictory, so we use a commit after 0.8.0 that adds Django 2.0 support.
+git+https://github.com/opencivicdata/pupa@f0791f7de07574039eff10d804e4683399a16ec5
+agate
+agate-excel
+django<5
+invoke
+lxml
+opencivicdata
+regex
+requests[security]
+unidecode
diff --git a/requirements.txt b/requirements.txt
@@ -1,15 +1,83 @@
-# 0.9.0 uses jsonschema instead of validictory, so we use a commit after 0.8.0 that adds Django 2.0 support.
--e git+https://github.com/opencivicdata/pupa.git@f0791f7de07574039eff10d804e4683399a16ec5#egg=pupa
-opencivicdata==3.3.1
-Django==2.2.28
-
-# Scrapers
-agate
-agate-excel
-lxml==4.9.1
-regex==2014.04.10
-requests[security]==2.32.0
-
-# Maintenance
+# This file was autogenerated by uv via the following command:
+#    uv pip compile requirements.in -o requirements.txt
+agate==1.12.0
+    # via
+    #   -r requirements.in
+    #   agate-excel
+agate-excel==0.4.1
+    # via -r requirements.in
+asgiref==3.8.1
+    # via django
+babel==2.16.0
+    # via agate
+certifi==2024.8.30
+    # via requests
+charset-normalizer==3.3.2
+    # via requests
+dj-database-url==0.3.0
+    # via pupa
+django==4.2.16
+    # via
+    #   -r requirements.in
+    #   opencivicdata
+    #   pupa
+et-xmlfile==1.1.0
+    # via openpyxl
+idna==3.10
+    # via requests
 invoke==0.11.1
-Unidecode==0.04.14
+    # via -r requirements.in
+isodate==0.6.1
+    # via agate
+leather==0.4.0
+    # via agate
+lxml==4.9.1
+    # via -r requirements.in
+olefile==0.47
+    # via agate-excel
+opencivicdata==3.3.1
+    # via
+    #   -r requirements.in
+    #   pupa
+openpyxl==3.1.5
+    # via agate-excel
+parsedatetime==2.6
+    # via agate
+psycopg2==2.9.9
+    # via pupa
+psycopg2-binary==2.9.9
+    # via opencivicdata
+pupa @ git+https://github.com/opencivicdata/pupa@f0791f7de07574039eff10d804e4683399a16ec5
+    # via -r requirements.in
+python-slugify==8.0.4
+    # via agate
+pytimeparse==1.1.8
+    # via agate
+pytz==2024.2
+    # via pupa
+regex==2014.4.10
+    # via -r requirements.in
+requests==2.32.3
+    # via
+    #   -r requirements.in
+    #   scrapelib
+scrapelib==2.3.0
+    # via pupa
+six==1.16.0
+    # via isodate
+sqlparse==0.5.1
+    # via django
+text-unidecode==1.3
+    # via python-slugify
+typing-extensions==4.12.2
+    # via asgiref
+unidecode==0.4.14
+    # via -r requirements.in
+urllib3==1.26.20
+    # via
+    #   requests
+    #   scrapelib
+validictory==1.1.3
+    # via pupa
+xlrd==2.0.1
+    # via agate-excel
diff --git a/setup.cfg b/setup.cfg
diff --git a/setup.py b/setup.py
diff --git a/tox.ini b/tox.ini
diff --git a/utils.py b/utils.py
@@ -256,6 +256,7 @@ class CSVScraper(CanadianScraper):
     """
     Set the CSV file's delimiter.
     """
+
     delimiter = ","
     """
     Set the CSV file's encoding, like 'windows-1252' ('utf-8' by default).
-Original file line number
+Diff line change
@@ Expand Up / @@ -256,6 +256,7 @@ class CSVScraper(CanadianScraper): @@
         """
         Set the CSV file's delimiter.
         """
         delimiter = ","
         """
         Set the CSV file's encoding, like 'windows-1252' ('utf-8' by default).
@@ Expand Down @@