diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 47434be2..5e135c6f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,16 +1,17 @@
ci:
autoupdate_schedule: quarterly
+ skip: [pip-compile]
+default_language_version:
+ python: python3.10
repos:
- - repo: https://github.com/psf/black
- rev: 24.4.2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.6.3
hooks:
- - id: black
- - repo: https://github.com/pycqa/flake8
- rev: 7.1.0
+ - id: ruff
+ - id: ruff-format
+ - repo: https://github.com/astral-sh/uv-pre-commit
+ rev: 0.4.4
hooks:
- - id: flake8
- additional_dependencies: [flake8-comprehensions]
- - repo: https://github.com/pycqa/isort
- rev: 5.13.2
- hooks:
- - id: isort
+ - id: pip-compile
+ name: pip-compile requirements.in
+ args: [requirements.in, -o, requirements.txt]
diff --git a/ca_bc_coquitlam/people.py b/ca_bc_coquitlam/people.py
index 51e98652..e8147c56 100644
--- a/ca_bc_coquitlam/people.py
+++ b/ca_bc_coquitlam/people.py
@@ -7,7 +7,6 @@
class CoquitlamPersonScraper(CanadianScraper):
-
def scrape(self):
def build_email(script):
w = re.findall(r'w = "(.*?)"', script)[0]
diff --git a/ca_bc_surrey/people.py b/ca_bc_surrey/people.py
index b0240acd..f9654b73 100644
--- a/ca_bc_surrey/people.py
+++ b/ca_bc_surrey/people.py
@@ -12,7 +12,6 @@ def scrape(self):
assert len(members), "No members found"
seat_number = 1
for member in members:
-
role, name = member.xpath('.//a[@class="teaser__link"]/h4')[0].text_content().split(" ", 1)
district = "Surrey (seat {})".format(seat_number)
seat_number += 1
diff --git a/ca_nl/people.py b/ca_nl/people.py
index a75bec04..b9f38932 100644
--- a/ca_nl/people.py
+++ b/ca_nl/people.py
@@ -1,9 +1,8 @@
import json
import re
-from utils import CUSTOM_USER_AGENT
+from utils import CUSTOM_USER_AGENT, CanadianScraper
from utils import CanadianPerson as Person
-from utils import CanadianScraper
COUNCIL_PAGE = "https://www.assembly.nl.ca/js/members-index.js"
@@ -26,9 +25,7 @@ def scrape(self):
page = self.get(COUNCIL_PAGE)
members = re.search(
r"members = (\[(.+)\]);", page.content.decode().replace("[Member-elect]", ""), re.DOTALL
- ).groups()[
- 0
- ] # extract javascript array
+ ).groups()[0] # extract javascript array
members = re.sub("", "", members) # remove comments
members = re.sub("", "", members).replace("", "") # tags
members = members.replace('"', r"\"") # escape double quotes
@@ -60,7 +57,8 @@ def scrape(self):
)
if member.get("email"):
p.add_contact(
- "email", member["email"].replace("@gov.nl.ca@gov.nl.ca", "@gov.nl.ca") # seriously guys?!
+ "email",
+ member["email"].replace("@gov.nl.ca@gov.nl.ca", "@gov.nl.ca"), # seriously guys?!
)
p.add_source(COUNCIL_PAGE)
diff --git a/ca_ns_cape_breton/people.py b/ca_ns_cape_breton/people.py
index dad984d0..6774bd7e 100644
--- a/ca_ns_cape_breton/people.py
+++ b/ca_ns_cape_breton/people.py
@@ -1,9 +1,8 @@
import html
import re
-from utils import CUSTOM_USER_AGENT
+from utils import CUSTOM_USER_AGENT, CanadianScraper
from utils import CanadianPerson as Person
-from utils import CanadianScraper
COUNCIL_PAGE = "http://www.cbrm.ns.ca/mayor-council-2.html"
MAYOR_PAGE = "http://www.cbrm.ns.ca/mayor"
diff --git a/ca_qc_cote_saint_luc/people.py b/ca_qc_cote_saint_luc/people.py
index 56a1f225..9670ea1c 100644
--- a/ca_qc_cote_saint_luc/people.py
+++ b/ca_qc_cote_saint_luc/people.py
@@ -1,6 +1,5 @@
-from utils import CUSTOM_USER_AGENT
+from utils import CUSTOM_USER_AGENT, CanadianScraper
from utils import CanadianPerson as Person
-from utils import CanadianScraper
COUNCIL_PAGE = "https://cotesaintluc.org/fr/affaires-municipales/membres-du-conseil/"
diff --git a/patch.py b/patch.py
index 2d6482a0..8acf2c0c 100644
--- a/patch.py
+++ b/patch.py
@@ -27,9 +27,9 @@
(r"\A1 \d{3} \d{3}-\d{4}(?: x\d+)?\Z", lambda x: x["type"] in ("text", "voice", "fax", "cell", "video", "pager")),
]
# Validate the format of contact_details[].note.
-_contact_details["items"]["properties"]["note"][
- "pattern"
-] = r"\A(?:constituency|legislature|office|residence|)(?: \(\d\))?\Z"
+_contact_details["items"]["properties"]["note"]["pattern"] = (
+ r"\A(?:constituency|legislature|office|residence|)(?: \(\d\))?\Z"
+)
# contact_details[] must not include unexpected properties.
_contact_details["items"]["additionalProperties"] = False
diff --git a/pyproject.toml b/pyproject.toml
index 8656c702..059d331a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,11 @@
-[tool.black]
+[project]
+name = "scrapers_ca"
+version = "0.0.1"
+
+[tool.ruff]
line-length = 119
+target-version = "py310"
-[tool.isort]
-profile = 'black'
-line_length = 119
+[tool.ruff.lint]
+select = ["C4", "E", "F", "I", "W"]
+ignore = ["E501"]
diff --git a/requirements.in b/requirements.in
new file mode 100644
index 00000000..3cb35058
--- /dev/null
+++ b/requirements.in
@@ -0,0 +1,11 @@
+# 0.9.0 uses jsonschema instead of validictory, so we use a commit after 0.8.0 that adds Django 2.0 support.
+git+https://github.com/opencivicdata/pupa@f0791f7de07574039eff10d804e4683399a16ec5
+agate
+agate-excel
+django<5
+invoke
+lxml
+opencivicdata
+regex
+requests[security]
+unidecode
diff --git a/requirements.txt b/requirements.txt
index c14d43a1..080af981 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,83 @@
-# 0.9.0 uses jsonschema instead of validictory, so we use a commit after 0.8.0 that adds Django 2.0 support.
--e git+https://github.com/opencivicdata/pupa.git@f0791f7de07574039eff10d804e4683399a16ec5#egg=pupa
-opencivicdata==3.3.1
-Django==2.2.28
-
-# Scrapers
-agate
-agate-excel
-lxml==4.9.1
-regex==2014.04.10
-requests[security]==2.32.0
-
-# Maintenance
+# This file was autogenerated by uv via the following command:
+# uv pip compile requirements.in -o requirements.txt
+agate==1.12.0
+ # via
+ # -r requirements.in
+ # agate-excel
+agate-excel==0.4.1
+ # via -r requirements.in
+asgiref==3.8.1
+ # via django
+babel==2.16.0
+ # via agate
+certifi==2024.8.30
+ # via requests
+charset-normalizer==3.3.2
+ # via requests
+dj-database-url==0.3.0
+ # via pupa
+django==4.2.16
+ # via
+ # -r requirements.in
+ # opencivicdata
+ # pupa
+et-xmlfile==1.1.0
+ # via openpyxl
+idna==3.10
+ # via requests
invoke==0.11.1
-Unidecode==0.04.14
+ # via -r requirements.in
+isodate==0.6.1
+ # via agate
+leather==0.4.0
+ # via agate
+lxml==4.9.1
+ # via -r requirements.in
+olefile==0.47
+ # via agate-excel
+opencivicdata==3.3.1
+ # via
+ # -r requirements.in
+ # pupa
+openpyxl==3.1.5
+ # via agate-excel
+parsedatetime==2.6
+ # via agate
+psycopg2==2.9.9
+ # via pupa
+psycopg2-binary==2.9.9
+ # via opencivicdata
+pupa @ git+https://github.com/opencivicdata/pupa@f0791f7de07574039eff10d804e4683399a16ec5
+ # via -r requirements.in
+python-slugify==8.0.4
+ # via agate
+pytimeparse==1.1.8
+ # via agate
+pytz==2024.2
+ # via pupa
+regex==2014.4.10
+ # via -r requirements.in
+requests==2.32.3
+ # via
+ # -r requirements.in
+ # scrapelib
+scrapelib==2.3.0
+ # via pupa
+six==1.16.0
+ # via isodate
+sqlparse==0.5.1
+ # via django
+text-unidecode==1.3
+ # via python-slugify
+typing-extensions==4.12.2
+ # via asgiref
+unidecode==0.4.14
+ # via -r requirements.in
+urllib3==1.26.20
+ # via
+ # requests
+ # scrapelib
+validictory==1.1.3
+ # via pupa
+xlrd==2.0.1
+ # via agate-excel
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index cfb0df10..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[flake8]
-extend-ignore = E203,E501
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 15719185..00000000
--- a/setup.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# @see https://pythonhosted.org/an_example_pypi_project/setuptools.html
-# @see https://pythonhosted.org/setuptools/setuptools.html
-import os
-
-from setuptools import find_packages, setup
-
-
-def read(fname):
- return open(os.path.join(os.path.dirname(__file__), fname)).read()
-
-
-setup(
- name="scrapers_ca",
- version="0.0.1",
- author="Open North",
- author_email="info@opennorth.ca",
- description="Canadian legislative scrapers",
- license="MIT",
- url="https://github.com/opencivicdata/scrapers-ca",
- packages=find_packages(),
- long_description=read("README.md"),
- install_requires=[
- "lxml",
- ],
-)
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 2c8b2524..00000000
--- a/tox.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[flake8]
-exclude=disabled
-ignore=E501,E731
-# E501 line too long (X > 79 characters)
-# E731 do not assign a lambda expression, use a def
diff --git a/utils.py b/utils.py
index 99fef627..030728bb 100644
--- a/utils.py
+++ b/utils.py
@@ -256,6 +256,7 @@ class CSVScraper(CanadianScraper):
"""
Set the CSV file's delimiter.
"""
+
delimiter = ","
"""
Set the CSV file's encoding, like 'windows-1252' ('utf-8' by default).