Skip to content

Commit

Permalink
Update Abbotsford scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
rafe-murray authored and jpmckinney committed May 14, 2024
1 parent 68eac63 commit 1456deb
Showing 1 changed file with 19 additions and 11 deletions.
30 changes: 19 additions & 11 deletions ca_bc_abbotsford/people.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from utils import CanadianPerson as Person
from utils import CanadianScraper

COUNCIL_PAGE = "http://www.abbotsford.ca/city_hall/mayor_and_council/city_council.htm"
CONTACT_PAGE = "http://www.abbotsford.ca/contact_us.htm"
COUNCIL_PAGE = "https://www.abbotsford.ca/council/city-council"
CONTACT_PAGE = "https://www.abbotsford.ca/city-hall/contact-us"


class AbbotsfordPersonScraper(CanadianScraper):
Expand All @@ -11,15 +11,17 @@ def scrape(self):

coun_page = self.lxmlize(COUNCIL_PAGE)
contact_page = self.lxmlize(CONTACT_PAGE)
councillors = coun_page.xpath('//div[@id="main-content"]//h3')
contact_data = contact_page.xpath(
'//p[contains(./strong/text(), "Mayor & Council")]/following-sibling::table[1]//tr'
)[2:]
councillors = coun_page.xpath(
'//div[@id="block-views-block-council-members-block-1"]//div[@class="views-row"]'
)
contact_data = contact_page.xpath('//caption[contains(./h3/text(), "Council")]/following-sibling::tbody//tr')[
:-1
]

assert len(councillors), "No councillors found"
assert len(councillors) == len(contact_data), "Expected {}, got {}".format(len(councillors), len(contact_data))
for councillor, contact in zip(councillors, contact_data):
text = councillor.text_content()
text = councillor.xpath(".//h3/a")[0].text_content()
if text.startswith("Councill"):
role = "Councillor"
district = "Abbotsford (seat {})".format(councillor_seat_number)
Expand All @@ -28,17 +30,23 @@ def scrape(self):
role = "Mayor"
district = "Abbotsford"
name = text.split(" ", 1)[1]
image = councillor.xpath("./img/@src")
phone = contact.xpath("./td[2]/text()")[0]
fax = contact.xpath("./td[3]/text()")[0]
image = councillor.xpath(".//img/@src")
email = self.get_email(contact)
address = contact.xpath("./td[2]//a/text()")[0]
phone = contact.xpath("./td[2]/div[contains(., 'Phone')]//@href")[0].split(":", 1)[1]
fax_div = contact.xpath("./td[2]/div[contains(., 'Fax')]//@href")

p = Person(primary_org="legislature", name=name, district=district, role=role)
p.add_source(COUNCIL_PAGE)
p.add_source(CONTACT_PAGE)

if image:
p.image = image[0]
if fax_div:
fax = fax_div[0].split(":", 1)[1]
p.add_contact("fax", fax, "legislature")
p.add_contact("voice", phone, "legislature")
p.add_contact("fax", fax, "legislature")
p.add_contact("address", address, "legislature")
p.add_contact("email", email)

yield p

0 comments on commit 1456deb

Please sign in to comment.