From a7d4dd2d3673ebd5dbfe60c54aa55918e689e4c9 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Sat, 21 Sep 2024 03:07:34 -0400 Subject: [PATCH] chore: Use Scraper.get instead of requests.get --- ca_mb_winnipeg/people.py | 4 +--- ca_nb_moncton/people.py | 4 +--- ca_on_caledon/people.py | 4 +--- ca_on_chatham_kent/people.py | 3 +-- ca_on_windsor/people.py | 4 +--- ca_qc_brossard/people.py | 4 +--- disabled/ca_nl_municipalities/people.py | 3 +-- disabled/ca_ns_municipalities/people.py | 3 +-- disabled/ca_sk_municipalities/people.py | 3 +-- disabled/ca_yt_municipalities/people.py | 3 +-- 10 files changed, 10 insertions(+), 25 deletions(-) diff --git a/ca_mb_winnipeg/people.py b/ca_mb_winnipeg/people.py index 16e2af66..c3a2daa7 100644 --- a/ca_mb_winnipeg/people.py +++ b/ca_mb_winnipeg/people.py @@ -1,7 +1,5 @@ import json -import requests - from utils import CanadianPerson as Person from utils import CanadianScraper @@ -12,7 +10,7 @@ class WinnipegPersonScraper(CanadianScraper): def scrape(self): # from https://data.winnipeg.ca/Council-Services/Council-Data/r4tk-7dip/about_data api_url = "https://data.winnipeg.ca/resource/r4tk-7dip.json" - data = json.loads(requests.get(api_url).content) + data = json.loads(self.get(api_url).content) assert len(data), "No councillors found via API" page = self.lxmlize(COUNCIL_PAGE) diff --git a/ca_nb_moncton/people.py b/ca_nb_moncton/people.py index e13a7aee..aa47ed0f 100644 --- a/ca_nb_moncton/people.py +++ b/ca_nb_moncton/people.py @@ -1,8 +1,6 @@ import json from collections import defaultdict -import requests - from utils import CanadianPerson as Person from utils import CanadianScraper @@ -13,7 +11,7 @@ class MonctonPersonScraper(CanadianScraper): def scrape(self): seat_numbers = defaultdict(int) - data = json.loads(requests.get(API_URL).content)["features"] + data = json.loads(self.get(API_URL).content)["features"] assert len(data), "No councillors found" for item in data: diff --git a/ca_on_caledon/people.py b/ca_on_caledon/people.py index cbf51335..837019c1 100644 --- a/ca_on_caledon/people.py +++ b/ca_on_caledon/people.py @@ -1,7 +1,5 @@ import re -import requests - from utils import CanadianPerson as Person from utils import CanadianScraper @@ -33,7 +31,7 @@ def scrape(self): # phone numbers populated by JS request contact_num = page.xpath('//div[@class="contactBody"]/div/@id')[0].replace("contactEntry_", "") - contact_data = requests.get( + contact_data = self.get( f"https://www.caledon.ca//Modules/Contact/services/GetContactHTML.ashx?isMobile=false¶m={contact_num}&lang=en" ).text voice = re.findall(r"(?<=tel://)\d+(?=\">)", contact_data) diff --git a/ca_on_chatham_kent/people.py b/ca_on_chatham_kent/people.py index 5ddd2165..c5a02d6a 100644 --- a/ca_on_chatham_kent/people.py +++ b/ca_on_chatham_kent/people.py @@ -1,7 +1,6 @@ import re from collections import defaultdict -import requests from lxml import etree from utils import CanadianPerson as Person @@ -19,7 +18,7 @@ def scrape(self): headers = {"content-type": "text/xml"} body = 'councillorsByWard50' - response = requests.post(url=COUNCIL_DATA_URL, data=body, headers=headers) + response = self.post(url=COUNCIL_DATA_URL, data=body, headers=headers) page = etree.fromstring(response.content) # noqa: S320 namespace = {"z": "#RowsetSchema", "rs": "urn:schemas-microsoft-com:rowset"} diff --git a/ca_on_windsor/people.py b/ca_on_windsor/people.py index 707da754..7ee6649c 100644 --- a/ca_on_windsor/people.py +++ b/ca_on_windsor/people.py @@ -1,7 +1,5 @@ import json -import requests - from utils import CanadianPerson as Person from utils import CanadianScraper @@ -12,7 +10,7 @@ class WindsorPersonScraper(CanadianScraper): def scrape(self): page = self.lxmlize(COUNCIL_PAGE) data_url = page.xpath('//comment()[contains(., "SITE JS")]/following-sibling::script/@src')[0] - data = json.loads(requests.get(data_url).text.split(" = ")[1]) + data = json.loads(self.get(data_url).text.split(" = ")[1]) nav_items = [] for item in data: if item["RollupType"] == "SidebarNavigation": diff --git a/ca_qc_brossard/people.py b/ca_qc_brossard/people.py index d82df70d..348bae07 100644 --- a/ca_qc_brossard/people.py +++ b/ca_qc_brossard/people.py @@ -1,8 +1,6 @@ import json import re -import requests - from utils import CanadianPerson as Person from utils import CanadianScraper @@ -33,7 +31,7 @@ def get_children(parent_id, element_dict): return return_list # The whole page is rendered in javascript and stored as a massive json object - page = requests.get(DATA_PAGE) + page = self.get(DATA_PAGE) page = json.loads(page.content) containers = page["content"].values() for container in containers: diff --git a/disabled/ca_nl_municipalities/people.py b/disabled/ca_nl_municipalities/people.py index 73262f79..499b0e73 100644 --- a/disabled/ca_nl_municipalities/people.py +++ b/disabled/ca_nl_municipalities/people.py @@ -3,7 +3,6 @@ import subprocess import tempfile -import requests from pupa.scrape import Organization from utils import CanadianPerson as Person @@ -17,7 +16,7 @@ def scrape(self): page = self.lxmlize(COUNCIL_PAGE) url = page.xpath('//a[contains(text(),"Municipal Directory")]/@href')[0] - response = requests.get(url).content + response = self.get(url).content with tempfile.NamedTemporaryFile(delete_on_close=False) as pdf: pdf.write(response) diff --git a/disabled/ca_ns_municipalities/people.py b/disabled/ca_ns_municipalities/people.py index 9ed12739..2d8b9fbe 100644 --- a/disabled/ca_ns_municipalities/people.py +++ b/disabled/ca_ns_municipalities/people.py @@ -3,7 +3,6 @@ import subprocess import tempfile -import requests from pupa.scrape import Organization from utils import CanadianPerson as Person @@ -14,7 +13,7 @@ class NovaScotiaMunicipalitiesPersonScraper(CanadianScraper): def scrape(self): - response = requests.get(COUNCIL_PAGE).content + response = self.get(COUNCIL_PAGE).content with tempfile.NamedTemporaryFile(delete_on_close=False) as pdf: pdf.write(response) diff --git a/disabled/ca_sk_municipalities/people.py b/disabled/ca_sk_municipalities/people.py index 6f96d1ce..b162995d 100644 --- a/disabled/ca_sk_municipalities/people.py +++ b/disabled/ca_sk_municipalities/people.py @@ -3,7 +3,6 @@ import subprocess import tempfile -import requests from pupa.scrape import Organization from utils import CanadianPerson as Person @@ -15,7 +14,7 @@ class SaskatchewanMunicipalitiesPersonScraper(CanadianScraper): def scrape(self): - response = requests.get(COUNCIL_PAGE).read() + response = self.get(COUNCIL_PAGE).read() with tempfile.NamedTemporaryFile(delete_on_close=False) as pdf: pdf.write(response) diff --git a/disabled/ca_yt_municipalities/people.py b/disabled/ca_yt_municipalities/people.py index 2cd79127..7c7ba0af 100644 --- a/disabled/ca_yt_municipalities/people.py +++ b/disabled/ca_yt_municipalities/people.py @@ -3,7 +3,6 @@ import subprocess import tempfile -import requests from pupa.scrape import Organization from utils import CanadianPerson as Person @@ -14,7 +13,7 @@ class YukonMunicipalitiesPersonScraper(CanadianScraper): def scrape(self): - response = requests.get(COUNCIL_PAGE).content + response = self.get(COUNCIL_PAGE).content with tempfile.NamedTemporaryFile(delete_on_close=False) as pdf: pdf.write(response)