From 0922876ebba07006f92ce44e5adfd41675964bff Mon Sep 17 00:00:00 2001
From: Seamus Lee <seamuslee001@gmail.com>
Date: Mon, 4 Nov 2024 17:52:40 +1100
Subject: [PATCH] Modify scraper as per James to be less breaky in future

---
 ca_on_mississauga/people.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/ca_on_mississauga/people.py b/ca_on_mississauga/people.py
index b903579b..42e2238f 100644
--- a/ca_on_mississauga/people.py
+++ b/ca_on_mississauga/people.py
@@ -2,7 +2,6 @@
 from utils import CanadianScraper
 
 COUNCIL_PAGE = "http://www.mississauga.ca/portal/cityhall/mayorandcouncil"
-MAYOR_PAGE = "http://www.mississauga.ca/portal/cityhall/mayorsoffice"
 CONTACT_PAGE = "http://www.mississauga.ca/portal/helpfeedback/contactus"
 
 
@@ -24,17 +23,14 @@ def councillor_data(self, url):
         page = self.lxmlize(url)
 
         name_district = page.xpath('//*[@id="com-main"]/div/div/div/h1/text()')[0]
-        hyphen = name_district.find("Councillor")
-        if hyphen == -1:
-            hyphen = 9
-        district = name_district[: hyphen - 3]
-        name = name_district[hyphen:]
-        mayor = name.find("Deputy")
-        if mayor != -1:
-            name = name[27:]
-        bracket = name.find("(")
-        if bracket != -1:
-            name = name[:bracket]
+        name_district_parts = name_district.split()
+        district = f"{name_district_parts[0]} {name_district_parts[1]}"
+        # Remove the first 3 elements of the name_district_parts which should include the district + '-'
+        name_district_parts.pop(0)
+        name_district_parts.pop(0)
+        name_district_parts.pop(0)
+        name = " ".join(name_district_parts)
+        name = name.replace("Councillor and Deputy Mayor", "").strip()
         email = self.get_email(page, '//section[contains(@class, "module-content")]')
         photo = page.xpath(
             '//section[contains(@class, "module-content")]/p[1]/img/@src|//section[contains(@class, "module-content")]/p[1]/b/img/@src|//section[contains(@class, "module-content")]/p[1]/strong/img/@src'
@@ -52,7 +48,7 @@ def mayor_data(self, url):
         page = self.lxmlize(url)
 
         name = page.xpath('//*[@id="com-main"]/div/div/div/h1/text()')[0]
-        name = name[8:]
+        name = name.replace("Mayor – ", "").strip()
         photo = page.xpath('//*[@id="65a01af8598b7"]/p[1]/img/@src')[0]
 
         p = Person(primary_org="legislature", name=name, district="Mississauga", role="Mayor")