Skip to content

Commit

Permalink
Fix Mississauga scraper since new mayor has been elected
Browse files Browse the repository at this point in the history
  • Loading branch information
seamuslee001 committed Oct 28, 2024
1 parent a370b4c commit 5cd9c90
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions ca_on_mississauga/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,22 @@ def scrape(self):
if "vacant" not in councillor_url.xpath(".//div//div[1]/text()")[0].lower():
yield self.councillor_data(councillor_url.attrib["href"])

mayor_page = self.lxmlize(MAYOR_PAGE)
mayor_name = mayor_page.xpath('//*[@id="com-main"]/div/div/div/h1/text()')[0]
if "vacant" not in mayor_name.lower():
yield self.mayor_data(MAYOR_PAGE)
mayor_url = page.xpath('//li/a[contains(@href, "mayor")]')[0]
if "vacant" not in mayor_url.xpath(".//div//div[1]/text()")[0].lower():
yield self.mayor_data(mayor_url.attrib["href"])

def councillor_data(self, url):
page = self.lxmlize(url)

name_district = page.xpath('//*[@id="com-main"]/div/div/div/h1/text()')[0]
hyphen = name_district.find("Councillor")
if hyphen == -1:
hyphen = 9
district = name_district[: hyphen - 3]
name = name_district[hyphen:]
mayor = name.find("Deputy")
if mayor != -1:
name = name[27:]
bracket = name.find("(")
if bracket != -1:
name = name[:bracket]
Expand All @@ -47,9 +51,9 @@ def councillor_data(self, url):
def mayor_data(self, url):
page = self.lxmlize(url)

name_text = page.xpath('//*[@id="com-main"]/div/div/div/h1/text()')[0]
name = name_text.split(",")[0]
photo = page.xpath('//img[contains(@src, "mayor")]/@src')[0]
name = page.xpath('//*[@id="com-main"]/div/div/div/h1/text()')[0]
name = name[8:]
photo = page.xpath('//*[@id="65a01af8598b7"]/p[1]/img/@src')[0]

p = Person(primary_org="legislature", name=name, district="Mississauga", role="Mayor")
p.add_source(url)
Expand Down

0 comments on commit 5cd9c90

Please sign in to comment.