From 5a67f286920b4ad6cf2592067558f6abe5fdb87e Mon Sep 17 00:00:00 2001
From: Rafe Murray <rafe.murray@jmaconsulting.biz>
Date: Thu, 6 Jun 2024 09:36:21 -0400
Subject: [PATCH 1/4] Fix getting emails protected by cloudflare

---
 ca_qc_sainte_anne_de_bellevue/people.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/ca_qc_sainte_anne_de_bellevue/people.py b/ca_qc_sainte_anne_de_bellevue/people.py
index 48cbbdab..e4a338e0 100644
--- a/ca_qc_sainte_anne_de_bellevue/people.py
+++ b/ca_qc_sainte_anne_de_bellevue/people.py
@@ -8,11 +8,20 @@
 
 class SainteAnneDeBellevuePersonScraper(CanadianScraper):
     def scrape(self):
+        def decode_email(e):
+            de = ""
+            k = int(e[:2], 16)
+
+            for i in range(2, len(e) - 1, 2):
+                de += chr(int(e[i : i + 2], 16) ^ k)
+
+            return de
+
         page = self.lxmlize(COUNCIL_PAGE)
 
         councillors = page.xpath('//div[@class="block text"]')
         assert len(councillors), "No councillors found"
-        for i, councillor in enumerate(councillors):
+        for councillor in councillors:
             name = councillor.xpath('.//div[@class="content-writable"]//strong/text()')[0]
             district = councillor.xpath(".//h2/text()")[0]
 
@@ -23,9 +32,11 @@ def scrape(self):
                 district = "District {}".format(re.search(r"\d+", district)[0])
                 role = "Conseiller"
 
+            encoded_email = councillor.xpath('.//@href[contains(., "email-protection")]')[0].split("#")[1]
+
             p = Person(primary_org="legislature", name=name, district=district, role=role)
             p.add_source(COUNCIL_PAGE)
 
             p.image = councillor.xpath(".//@src")[0]
-            p.add_contact("email", self.get_email(councillor))
+            p.add_contact("email", decode_email(encoded_email))
             yield p

From e2d1b3931bc47a580f2ed4850f3eba57a3a6801f Mon Sep 17 00:00:00 2001
From: Samuel Pei <samuel.pei@jmaconsulting.biz>
Date: Tue, 29 Oct 2024 14:36:27 -0400
Subject: [PATCH 2/4] All the councillors are in one div so I took a different
 approach to extracting the data.

---
 ca_qc_sainte_anne_de_bellevue/people.py | 55 ++++++++++++++++++-------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/ca_qc_sainte_anne_de_bellevue/people.py b/ca_qc_sainte_anne_de_bellevue/people.py
index e4a338e0..e7b660b1 100644
--- a/ca_qc_sainte_anne_de_bellevue/people.py
+++ b/ca_qc_sainte_anne_de_bellevue/people.py
@@ -5,7 +5,6 @@
 
 COUNCIL_PAGE = "https://www.ville.sainte-anne-de-bellevue.qc.ca/fr/199/elus-municipaux"
 
-
 class SainteAnneDeBellevuePersonScraper(CanadianScraper):
     def scrape(self):
         def decode_email(e):
@@ -19,24 +18,48 @@ def decode_email(e):
 
         page = self.lxmlize(COUNCIL_PAGE)
 
-        councillors = page.xpath('//div[@class="block text"]')
+        councillors = page.xpath('//div[@class="col-md-12"]')[0]
         assert len(councillors), "No councillors found"
-        for councillor in councillors:
-            name = councillor.xpath('.//div[@class="content-writable"]//strong/text()')[0]
-            district = councillor.xpath(".//h2/text()")[0]
+        
+        roles_and_districts = councillors.xpath('.//h2/text()')
+        roles = []
+        districts = []
+        names = []
+        emails = []
 
-            if "Maire" in district:
-                district = "Sainte-Anne-de-Bellevue"
-                role = "Maire"
-            else:
-                district = "District {}".format(re.search(r"\d+", district)[0])
-                role = "Conseiller"
+        # Fill in roles and districts via h2 tags
+        for role in roles_and_districts:
+            role_and_district = role.split()
 
-            encoded_email = councillor.xpath('.//@href[contains(., "email-protection")]')[0].split("#")[1]
+            roles.append(role_and_district[0])
+    
+            if len(role_and_district) == 1:
+                districts.append("Sainte-Anne-de-Bellevue")
+            else:
+                districts.append("District " + role_and_district[2])
+    
+        # Fill in contact info via p tags.
+        contact_info = councillors.xpath('.//p[a[contains(@href, "@")]]')
+        for contact in contact_info:
+            contact = contact.text_content().split()
+            print(contact)
+            input()
+            name = " ".join(contact[:2])
+            names.append(name)
+    
+            email = contact[3]
+            email = email.replace("Président", "")
+            emails.append(email)
+        
+        print(roles)
+        print(districts)
+        print(names)
+        print(emails)
+        input()
 
-            p = Person(primary_org="legislature", name=name, district=district, role=role)
+        assert len(roles) == len(districts) == len(names) == len(emails), "Lists are not of equal length"
+        for i in range(len(roles)):
+            p = Person(primary_org="legislature", name=names[i], district=districts[i], role=roles[i])
             p.add_source(COUNCIL_PAGE)
-
-            p.image = councillor.xpath(".//@src")[0]
-            p.add_contact("email", decode_email(encoded_email))
+            p.add_contact("email", emails[i])
             yield p

From 8b408b9ec97c69c9958930eb4d7418cf093af52e Mon Sep 17 00:00:00 2001
From: Samuel Pei <samuel.pei@jmaconsulting.biz>
Date: Wed, 6 Nov 2024 11:14:46 -0500
Subject: [PATCH 3/4] Updated formatting

---
 ca_qc_sainte_anne_de_bellevue/people.py | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/ca_qc_sainte_anne_de_bellevue/people.py b/ca_qc_sainte_anne_de_bellevue/people.py
index e7b660b1..5b6ba033 100644
--- a/ca_qc_sainte_anne_de_bellevue/people.py
+++ b/ca_qc_sainte_anne_de_bellevue/people.py
@@ -1,10 +1,9 @@
-import re
-
 from utils import CanadianPerson as Person
 from utils import CanadianScraper
 
 COUNCIL_PAGE = "https://www.ville.sainte-anne-de-bellevue.qc.ca/fr/199/elus-municipaux"
 
+
 class SainteAnneDeBellevuePersonScraper(CanadianScraper):
     def scrape(self):
         def decode_email(e):
@@ -20,8 +19,8 @@ def decode_email(e):
 
         councillors = page.xpath('//div[@class="col-md-12"]')[0]
         assert len(councillors), "No councillors found"
-        
-        roles_and_districts = councillors.xpath('.//h2/text()')
+
+        roles_and_districts = councillors.xpath(".//h2/text()")
         roles = []
         districts = []
         names = []
@@ -32,30 +31,22 @@ def decode_email(e):
             role_and_district = role.split()
 
             roles.append(role_and_district[0])
-    
+
             if len(role_and_district) == 1:
                 districts.append("Sainte-Anne-de-Bellevue")
             else:
                 districts.append("District " + role_and_district[2])
-    
+
         # Fill in contact info via p tags.
         contact_info = councillors.xpath('.//p[a[contains(@href, "@")]]')
         for contact in contact_info:
             contact = contact.text_content().split()
-            print(contact)
-            input()
             name = " ".join(contact[:2])
             names.append(name)
-    
+
             email = contact[3]
             email = email.replace("Président", "")
             emails.append(email)
-        
-        print(roles)
-        print(districts)
-        print(names)
-        print(emails)
-        input()
 
         assert len(roles) == len(districts) == len(names) == len(emails), "Lists are not of equal length"
         for i in range(len(roles)):

From 6ba3826af1d9f2607db5400f85387c536168464b Mon Sep 17 00:00:00 2001
From: Samuel Pei <samuel.pei@jmaconsulting.biz>
Date: Fri, 8 Nov 2024 14:26:18 -0500
Subject: [PATCH 4/4] Re-implemented scraper using preceding-sibling

---
 ca_qc_sainte_anne_de_bellevue/people.py | 49 +++++++------------------
 1 file changed, 13 insertions(+), 36 deletions(-)

diff --git a/ca_qc_sainte_anne_de_bellevue/people.py b/ca_qc_sainte_anne_de_bellevue/people.py
index 5b6ba033..376a0baf 100644
--- a/ca_qc_sainte_anne_de_bellevue/people.py
+++ b/ca_qc_sainte_anne_de_bellevue/people.py
@@ -6,51 +6,28 @@
 
 class SainteAnneDeBellevuePersonScraper(CanadianScraper):
     def scrape(self):
-        def decode_email(e):
-            de = ""
-            k = int(e[:2], 16)
-
-            for i in range(2, len(e) - 1, 2):
-                de += chr(int(e[i : i + 2], 16) ^ k)
-
-            return de
-
         page = self.lxmlize(COUNCIL_PAGE)
 
-        councillors = page.xpath('//div[@class="col-md-12"]')[0]
+        councillors = page.xpath('//p[a[contains(@href, "@")]]')
         assert len(councillors), "No councillors found"
 
-        roles_and_districts = councillors.xpath(".//h2/text()")
-        roles = []
-        districts = []
-        names = []
-        emails = []
+        for councillor in councillors:
+            role = councillor.xpath("./preceding-sibling::h2[1]/text()")[0]
 
-        # Fill in roles and districts via h2 tags
-        for role in roles_and_districts:
-            role_and_district = role.split()
-
-            roles.append(role_and_district[0])
-
-            if len(role_and_district) == 1:
-                districts.append("Sainte-Anne-de-Bellevue")
+            if role == "Maire":
+                district = "Sainte-Anne-de-Bellevue"
             else:
-                districts.append("District " + role_and_district[2])
+                district = "District " + role.split()[2]
+                role = "Conseiller"
 
-        # Fill in contact info via p tags.
-        contact_info = councillors.xpath('.//p[a[contains(@href, "@")]]')
-        for contact in contact_info:
-            contact = contact.text_content().split()
-            name = " ".join(contact[:2])
-            names.append(name)
+            councillor = councillor.text_content().split()
 
-            email = contact[3]
+            name = " ".join(councillor[:2])
+            email = councillor[3]
             email = email.replace("Président", "")
-            emails.append(email)
 
-        assert len(roles) == len(districts) == len(names) == len(emails), "Lists are not of equal length"
-        for i in range(len(roles)):
-            p = Person(primary_org="legislature", name=names[i], district=districts[i], role=roles[i])
+            p = Person(primary_org="legislature", name=name, district=district, role=role)
             p.add_source(COUNCIL_PAGE)
-            p.add_contact("email", emails[i])
+            p.add_contact("email", email)
+
             yield p