Skip to content

Commit

Permalink
fixup! Add scraper for new Scottish Parliament site
Browse files Browse the repository at this point in the history
  • Loading branch information
ajparsons committed Apr 22, 2024
1 parent 20cc60c commit 012baa0
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions pyscraper/sp_2024/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,13 @@ def process_raw_html(html: str, agenda_item_url: str):
parent.extend(before)

# create a new speech element with the content after the division
new_speech = soup.new_tag("speech")
# get all attrbs from original speech
for k, v in speaker.attrs.items():
new_speech[k] = v
new_speech.extend(after)
parent.insert_after(new_speech)
if len(after) > 0:
new_speech = soup.new_tag("speech")
# get all attrbs from original speech
for k, v in speaker.attrs.items():
new_speech[k] = v
new_speech.extend(after)
division.insert_after(new_speech)

# convert soup into etree
return etree.fromstring(str(soup))
Expand Down

0 comments on commit 012baa0

Please sign in to comment.