Skip to content

Commit

Permalink
build: Upgrade Django. chore: Use ruff, uv.
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Sep 16, 2024
1 parent 1fe12c6 commit 5fdd7d4
Show file tree
Hide file tree
Showing 14 changed files with 124 additions and 76 deletions.
23 changes: 12 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
ci:
autoupdate_schedule: quarterly
skip: [pip-compile]
default_language_version:
python: python3.10
repos:
- repo: https://github.com/psf/black
rev: 24.4.2
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.3
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 7.1.0
- id: ruff
- id: ruff-format
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.4.4
hooks:
- id: flake8
additional_dependencies: [flake8-comprehensions]
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
- id: pip-compile
name: pip-compile requirements.in
args: [requirements.in, -o, requirements.txt]
1 change: 0 additions & 1 deletion ca_bc_coquitlam/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@


class CoquitlamPersonScraper(CanadianScraper):

def scrape(self):
def build_email(script):
w = re.findall(r'w = "(.*?)"', script)[0]
Expand Down
1 change: 0 additions & 1 deletion ca_bc_surrey/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def scrape(self):
assert len(members), "No members found"
seat_number = 1
for member in members:

role, name = member.xpath('.//a[@class="teaser__link"]/h4')[0].text_content().split(" ", 1)
district = "Surrey (seat {})".format(seat_number)
seat_number += 1
Expand Down
10 changes: 4 additions & 6 deletions ca_nl/people.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import json
import re

from utils import CUSTOM_USER_AGENT
from utils import CUSTOM_USER_AGENT, CanadianScraper
from utils import CanadianPerson as Person
from utils import CanadianScraper

COUNCIL_PAGE = "https://www.assembly.nl.ca/js/members-index.js"

Expand All @@ -26,9 +25,7 @@ def scrape(self):
page = self.get(COUNCIL_PAGE)
members = re.search(
r"members = (\[(.+)\]);", page.content.decode().replace("<em>[Member-elect]</em>", ""), re.DOTALL
).groups()[
0
] # extract javascript array
).groups()[0] # extract javascript array
members = re.sub("<!--.+?-->", "", members) # remove comments
members = re.sub("<a.+?>", "", members).replace("</a>", "") # tags
members = members.replace('"', r"\"") # escape double quotes
Expand Down Expand Up @@ -60,7 +57,8 @@ def scrape(self):
)
if member.get("email"):
p.add_contact(
"email", member["email"].replace("@[email protected]", "@gov.nl.ca") # seriously guys?!
"email",
member["email"].replace("@[email protected]", "@gov.nl.ca"), # seriously guys?!
)

p.add_source(COUNCIL_PAGE)
Expand Down
3 changes: 1 addition & 2 deletions ca_ns_cape_breton/people.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import html
import re

from utils import CUSTOM_USER_AGENT
from utils import CUSTOM_USER_AGENT, CanadianScraper
from utils import CanadianPerson as Person
from utils import CanadianScraper

COUNCIL_PAGE = "http://www.cbrm.ns.ca/mayor-council-2.html"
MAYOR_PAGE = "http://www.cbrm.ns.ca/mayor"
Expand Down
3 changes: 1 addition & 2 deletions ca_qc_cote_saint_luc/people.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from utils import CUSTOM_USER_AGENT
from utils import CUSTOM_USER_AGENT, CanadianScraper
from utils import CanadianPerson as Person
from utils import CanadianScraper

COUNCIL_PAGE = "https://cotesaintluc.org/fr/affaires-municipales/membres-du-conseil/"

Expand Down
6 changes: 3 additions & 3 deletions patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
(r"\A1 \d{3} \d{3}-\d{4}(?: x\d+)?\Z", lambda x: x["type"] in ("text", "voice", "fax", "cell", "video", "pager")),
]
# Validate the format of contact_details[].note.
_contact_details["items"]["properties"]["note"][
"pattern"
] = r"\A(?:constituency|legislature|office|residence|)(?: \(\d\))?\Z"
_contact_details["items"]["properties"]["note"]["pattern"] = (
r"\A(?:constituency|legislature|office|residence|)(?: \(\d\))?\Z"
)
# contact_details[] must not include unexpected properties.
_contact_details["items"]["additionalProperties"] = False

Expand Down
13 changes: 9 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
[tool.black]
[project]
name = "scrapers_ca"
version = "0.0.1"

[tool.ruff]
line-length = 119
target-version = "py310"

[tool.isort]
profile = 'black'
line_length = 119
[tool.ruff.lint]
select = ["C4", "E", "F", "I", "W"]
ignore = ["E501"]
11 changes: 11 additions & 0 deletions requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# 0.9.0 uses jsonschema instead of validictory, so we use a commit after 0.8.0 that adds Django 2.0 support.
git+https://github.com/opencivicdata/pupa@f0791f7de07574039eff10d804e4683399a16ec5
agate
agate-excel
django<5
invoke
lxml
opencivicdata
regex
requests[security]
unidecode
96 changes: 82 additions & 14 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,83 @@
# 0.9.0 uses jsonschema instead of validictory, so we use a commit after 0.8.0 that adds Django 2.0 support.
-e git+https://github.com/opencivicdata/pupa.git@f0791f7de07574039eff10d804e4683399a16ec5#egg=pupa
opencivicdata==3.3.1
Django==2.2.28

# Scrapers
agate
agate-excel
lxml==4.9.1
regex==2014.04.10
requests[security]==2.32.0

# Maintenance
# This file was autogenerated by uv via the following command:
# uv pip compile requirements.in -o requirements.txt
agate==1.12.0
# via
# -r requirements.in
# agate-excel
agate-excel==0.4.1
# via -r requirements.in
asgiref==3.8.1
# via django
babel==2.16.0
# via agate
certifi==2024.8.30
# via requests
charset-normalizer==3.3.2
# via requests
dj-database-url==0.3.0
# via pupa
django==4.2.16
# via
# -r requirements.in
# opencivicdata
# pupa
et-xmlfile==1.1.0
# via openpyxl
idna==3.10
# via requests
invoke==0.11.1
Unidecode==0.04.14
# via -r requirements.in
isodate==0.6.1
# via agate
leather==0.4.0
# via agate
lxml==4.9.1
# via -r requirements.in
olefile==0.47
# via agate-excel
opencivicdata==3.3.1
# via
# -r requirements.in
# pupa
openpyxl==3.1.5
# via agate-excel
parsedatetime==2.6
# via agate
psycopg2==2.9.9
# via pupa
psycopg2-binary==2.9.9
# via opencivicdata
pupa @ git+https://github.com/opencivicdata/pupa@f0791f7de07574039eff10d804e4683399a16ec5
# via -r requirements.in
python-slugify==8.0.4
# via agate
pytimeparse==1.1.8
# via agate
pytz==2024.2
# via pupa
regex==2014.4.10
# via -r requirements.in
requests==2.32.3
# via
# -r requirements.in
# scrapelib
scrapelib==2.3.0
# via pupa
six==1.16.0
# via isodate
sqlparse==0.5.1
# via django
text-unidecode==1.3
# via python-slugify
typing-extensions==4.12.2
# via asgiref
unidecode==0.4.14
# via -r requirements.in
urllib3==1.26.20
# via
# requests
# scrapelib
validictory==1.1.3
# via pupa
xlrd==2.0.1
# via agate-excel
2 changes: 0 additions & 2 deletions setup.cfg

This file was deleted.

25 changes: 0 additions & 25 deletions setup.py

This file was deleted.

5 changes: 0 additions & 5 deletions tox.ini

This file was deleted.

1 change: 1 addition & 0 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ class CSVScraper(CanadianScraper):
"""
Set the CSV file's delimiter.
"""

delimiter = ","
"""
Set the CSV file's encoding, like 'windows-1252' ('utf-8' by default).
Expand Down

0 comments on commit 5fdd7d4

Please sign in to comment.