Skip to content

Commit

Permalink
Fix getting emails protected by cloudflare
Browse files Browse the repository at this point in the history
  • Loading branch information
rafe-murray committed Jun 6, 2024
1 parent dcaab20 commit 5a67f28
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions ca_qc_sainte_anne_de_bellevue/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,20 @@

class SainteAnneDeBellevuePersonScraper(CanadianScraper):
def scrape(self):
def decode_email(e):
de = ""
k = int(e[:2], 16)

for i in range(2, len(e) - 1, 2):
de += chr(int(e[i : i + 2], 16) ^ k)

return de

page = self.lxmlize(COUNCIL_PAGE)

councillors = page.xpath('//div[@class="block text"]')
assert len(councillors), "No councillors found"
for i, councillor in enumerate(councillors):
for councillor in councillors:
name = councillor.xpath('.//div[@class="content-writable"]//strong/text()')[0]
district = councillor.xpath(".//h2/text()")[0]

Expand All @@ -23,9 +32,11 @@ def scrape(self):
district = "District {}".format(re.search(r"\d+", district)[0])
role = "Conseiller"

encoded_email = councillor.xpath('.//@href[contains(., "email-protection")]')[0].split("#")[1]

p = Person(primary_org="legislature", name=name, district=district, role=role)
p.add_source(COUNCIL_PAGE)

p.image = councillor.xpath(".//@src")[0]
p.add_contact("email", self.get_email(councillor))
p.add_contact("email", decode_email(encoded_email))
yield p

0 comments on commit 5a67f28

Please sign in to comment.