diff --git a/ca_qc_sainte_anne_de_bellevue/people.py b/ca_qc_sainte_anne_de_bellevue/people.py index 30d4662b..376a0baf 100644 --- a/ca_qc_sainte_anne_de_bellevue/people.py +++ b/ca_qc_sainte_anne_de_bellevue/people.py @@ -1,5 +1,3 @@ -import re - from utils import CanadianPerson as Person from utils import CanadianScraper @@ -10,22 +8,26 @@ class SainteAnneDeBellevuePersonScraper(CanadianScraper): def scrape(self): page = self.lxmlize(COUNCIL_PAGE) - councillors = page.xpath('//div[@class="block text"]') + councillors = page.xpath('//p[a[contains(@href, "@")]]') assert len(councillors), "No councillors found" + for councillor in councillors: - name = councillor.xpath('.//div[@class="content-writable"]//strong/text()')[0] - district = councillor.xpath(".//h2/text()")[0] + role = councillor.xpath("./preceding-sibling::h2[1]/text()")[0] - if "Maire" in district: + if role == "Maire": district = "Sainte-Anne-de-Bellevue" - role = "Maire" else: - district = "District {}".format(re.search(r"\d+", district)[0]) + district = "District " + role.split()[2] role = "Conseiller" + councillor = councillor.text_content().split() + + name = " ".join(councillor[:2]) + email = councillor[3] + email = email.replace("Président", "") + p = Person(primary_org="legislature", name=name, district=district, role=role) p.add_source(COUNCIL_PAGE) + p.add_contact("email", email) - p.image = councillor.xpath(".//@src")[0] - p.add_contact("email", self.get_email(councillor)) yield p