Skip to content

Commit

Permalink
fix for cqc import
Browse files Browse the repository at this point in the history
not a fix for schools_ni import
  • Loading branch information
drkane committed Jan 8, 2024
1 parent 762ddb6 commit c110400
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 8 deletions.
40 changes: 37 additions & 3 deletions ftc/management/commands/import_schools_ni.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,15 @@ class Command(HTMLScraper):
}
orgtypes = ["Education Institution"]

def parse_file(self, response, source_url):
def fetch_file(self):
self.files = {}
for u in self.start_urls:
r = self.session.get(u, verify=self.verify_certificate)
r.raise_for_status()
self.set_access_url(u)
self.files[u] = self._get_csv_file(r, u, r.cookies)

def _get_csv_file(self, response, source_url, cookies=None):
post_params = {
"__EVENTARGUMENT": "",
"__EVENTTARGET": "",
Expand Down Expand Up @@ -67,11 +75,37 @@ def parse_file(self, response, source_url):
r = self.session.post(
self.start_urls[0],
data=post_params,
headers={
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-GB,en-US;q=0.7,en;q=0.3",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
# "Content-Length": "12677",
"Content-Type": "application/x-www-form-urlencoded",
# "Cookie": "ASP.NET_SessionId=wl4k0vncy22g1qqb3lwlq5my; CookiePersist=!+X2EE36OE7ulB+JCSgh2GD+ElYVd8ovUgXUOYcC+E/1pLtPiJ/NKfn8u4FX95gYy8NaGOmePK1nAx8c=; TS01690ce2=017f41f17b77ec278b7df07d7e6fa6192f9f768f3a4139b2bd6444df4b0c5d5e81d49d7c68a22f17141f2469d9bebe105edb30f805310290f2254d408e4595d7bd186b4c31955e3c3dbaf0d5002a97ad65d3e3f384; TSb21aa7bb027=08108c6895ab2000d5cc63184ef81a6c81c2679dd0dc3772f90e833efb2e48d90a1c9128d33c3ed808f2ff98a2113000caf8222c22a66dd680f58cd298e20631b8917e4aca7bdc8d1e1b0627e99b84082a024a5471a1aa3285a91d2cbb0670e6",
"DNT": "1",
"Host": "apps.education-ni.gov.uk",
"Origin": "https://apps.education-ni.gov.uk",
"Pragma": "no-cache",
"Referer": "https://apps.education-ni.gov.uk/appinstitutes/default.aspx",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
},
cookies=cookies,
)
r.raise_for_status()
return r

def parse_file(self, response, source_url):
try:
csv_text = r.text
csv_text = response.text
except AttributeError:
csv_text = r.body.decode(self.encoding)
csv_text = response.body.decode(self.encoding)

with io.StringIO(csv_text) as a:
csvreader = csv.DictReader(a)
Expand Down
9 changes: 4 additions & 5 deletions other_data/management/commands/import_cqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,13 @@ def parse_row(self, row):
this_model["CQCProvider"]["brand_id"] = this_model["CQCBrand"]["id"]

if this_model["CQCProvider"]["charity_number"]:
if this_model["CQCProvider"]["charity_number"].upper().startswith("SC"):
charity_number = str(this_model["CQCProvider"]["charity_number"])
if charity_number.upper().startswith("SC"):
this_model["CQCProvider"]["org_id"] = "GB-SC-{}".format(
this_model["CQCProvider"]["charity_number"].upper()
charity_number.upper()
)
else:
this_model["CQCProvider"]["org_id"] = "GB-CHC-{}".format(
this_model["CQCProvider"]["charity_number"]
)
this_model["CQCProvider"]["org_id"] = "GB-CHC-{}".format(charity_number)
elif this_model["CQCProvider"]["company_number"]:
this_model["CQCProvider"]["org_id"] = "GB-COH-{}".format(
this_model["CQCProvider"]["company_number"]
Expand Down

0 comments on commit c110400

Please sign in to comment.