Skip to content

Commit

Permalink
Added 2017 session and fixed house bill# and action parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
showerst committed Dec 1, 2016
1 parent 7088440 commit 8d5ca8e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
12 changes: 12 additions & 0 deletions openstates/mo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@
'start_year': 2015,
'end_year': 2016,
},
{
'name': '2017-2018',
'sessions': ['2017'],
'start_year': 2017,
'end_year': 2018,
},
],
# General Assembly sessions convene the Wed. following the first Mon.
# of January and adjourn May 30.
Expand Down Expand Up @@ -80,6 +86,12 @@
'end_date': datetime.date(2016,5,30),
'display_name': '2016 Regular Session',
},
'2017': {
'type': 'primary',
'start_date': datetime.date(2017,1,4),
'end_date': datetime.date(2017,5,12),
'display_name': '2017 Regular Session',
},
},
feature_flags = ['subjects', 'influenceexplorer'],
_ignored_scraped_sessions = [
Expand Down
19 changes: 13 additions & 6 deletions openstates/mo/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,13 +275,12 @@ def _scrape_house_subjects(self, session):
self._subjects[bill_id].append(subject.text)

def _parse_house_actions(self, bill, url):
url = re.sub("BillActions", "BillActionsPrn", url)
bill.add_source(url)
actions_page = self.get(url).text
actions_page = lxml.html.fromstring(actions_page)
rows = actions_page.xpath('//table/tr')

for row in rows[1:]:
for row in rows:
# new actions are represented by having dates in the first td
# otherwise, it's a continuation of the description from the
# previous action
Expand Down Expand Up @@ -337,11 +336,19 @@ def _parse_house_bill(self, url, session):
url = re.sub("billsummary", "billsummaryprn", url)
url = '%s/%s' % (self._senate_base_url,url)

#the URL is an iframed version now, so swap in for the actual bill page

url = url.replace('Bill.aspx','BillContent.aspx')
url = url.replace('&code=R','&code=R&style=new')

# http://www.house.mo.gov/Bill.aspx?bill=HB26&year=2017&code=R
# http://www.house.mo.gov/BillContent.aspx?bill=HB26&year=2017&code=R&style=new

bill_page = self.get(url).text
bill_page = lxml.html.fromstring(bill_page)
bill_page.make_links_absolute(url)

bill_id = bill_page.xpath('//*[@class="entry-title"]')
bill_id = bill_page.xpath('//*[@class="entry-title"]/div')
if len(bill_id) == 0:
self.log("WARNING: bill summary page is blank! (%s)" % url)
self._bad_urls.append(url)
Expand Down Expand Up @@ -440,7 +447,7 @@ def _parse_house_bill(self, url, session):
# actions_link = re.sub("content", "print", actions_link)

actions_link, = bill_page.xpath(
"//a[contains(@href, 'BillActions.aspx')]/@href")
"//a[contains(@href, 'BillActionsPrn.aspx')]/@href")
self._parse_house_actions(bill, actions_link)

# get bill versions
Expand Down Expand Up @@ -512,6 +519,6 @@ def scrape(self, chamber, year):
getattr(self, '_scrape_' + chamber + '_chamber')(year)

if len(self._bad_urls) > 0:
self.warn('WARNINGS:')
self.warning('WARNINGS:')
for url in self._bad_urls:
self.warn('{}'.format(url))
self.warning('{}'.format(url))

0 comments on commit 8d5ca8e

Please sign in to comment.