Skip to content

Commit

Permalink
Got resolutions scraper working, proper alias for 2016 special for pd…
Browse files Browse the repository at this point in the history
…f filenames
  • Loading branch information
showerst committed Aug 16, 2016
1 parent d862c15 commit 9f752d5
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 23 deletions.
4 changes: 2 additions & 2 deletions openstates/al/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
'name': '2015-2018',
'start_year': 2015,
'end_year': 2018,
'sessions': ['2015os','2015rs', '2015fs', '2015ss', '2016rs','2016ss','2017rs'],
'sessions': ['2015os','2015rs', '2015fs', '2015ss', '2016rs','2016fs','2017rs'],
}
],
'session_details': {
Expand Down Expand Up @@ -88,7 +88,7 @@
'internal_id': '1065',
'_scraped_name': 'Regular Session 2016',
},
'2016ss': {
'2016fs': {
'type': 'special',
'display_name': 'First Special Session 2016',
'internal_id': '1068',
Expand Down
36 changes: 15 additions & 21 deletions openstates/al/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,23 +107,12 @@ def _get_bill_list(self, url):
for _retry in range(self.retry_attempts):
html = self.get(url=url).text

#print html

doc = lxml.html.fromstring(html)

bills = doc.xpath('//table[@id="ContentPlaceHolder1_gvBills"]/tr')[1:]

resolutions = doc.xpath(
'//table[@id="ContentPlaceHolder1_gvResolutions"]/tr')[1:]

print doc.xpath(
'//span[@id="ContentPlaceHolder1_lblCount"]/font/text()'
)
listing = doc.xpath('//table[@id="ContentPlaceHolder1_gvBills"]/tr')[1:]

if bills and resolutions:
raise AssertionError("Found multiple bill types")
elif bills or resolutions:
return bills or resolutions
if listing:
return listing
elif doc.xpath(
'//span[@id="ContentPlaceHolder1_lblCount"]/font/text()'
) == ["0 Instruments", ]:
Expand All @@ -136,7 +125,6 @@ def _get_bill_list(self, url):
)
continue
else:
#print html
raise AssertionError("Bill list not found")

def _get_bill_response(self, url):
Expand Down Expand Up @@ -186,18 +174,21 @@ def scrape(self, session, chambers):

self.scrape_bill_list(BILL_LIST_URL)

#self._set_session(session)
self._set_session(session)

# Acquire and process a list of all resolutions
RESOLUTION_TYPE_URL = (
'http://alisondb.legislature.state.al.us/Alison/'
'SESSResosBySelectedStatus.aspx')
'SESSResosBySelectedStatus.aspx?BODYID=1755')
RESOLUTION_LIST_URL = (
'http://alisondb.legislature.state.al.us/Alison/'
'SESSResosList.aspx?STATUSCODES=Had%20First%20Reading'
'%20House%20of%20Origin&BODY=999999')

doc = lxml.html.fromstring(self.get(url=BILL_TYPE_URL).text)
resText = self.get(url=RESOLUTION_TYPE_URL).text

doc = lxml.html.fromstring(resText)

(viewstate, ) = doc.xpath('//input[@id="__VIEWSTATE"]/@value')
(viewstategenerator, ) = doc.xpath(
'//input[@id="__VIEWSTATEGENERATOR"]/@value')
Expand All @@ -207,9 +198,10 @@ def scrape(self, session, chambers):
'__EVENTARGUMENT': 'Select$0',
'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewstategenerator,
'ctl00$ScriptManager1': 'ctl00$UpdatePanel1|ctl00$'
'ctl00$ScriptManager1': 'tctl00$UpdatePanel1|ctl00$'
'MainDefaultContent$gvStatus$ctl02$ctl00'
}

deb = self.post(url=RESOLUTION_TYPE_URL, data=form, allow_redirects=True)

self.scrape_bill_list(RESOLUTION_LIST_URL)
Expand Down Expand Up @@ -259,8 +251,10 @@ def scrape_bill_list(self, url):
continue
bill_doc = lxml.html.fromstring(bill_html)

title = bill_doc.xpath(
'//span[@id="ContentPlaceHolder1_lblShotTitle"]//text()')[0].strip()
if( bill_doc.xpath(
'//span[@id="ContentPlaceHolder1_lblShotTitle"]') ):
title = bill_doc.xpath(
'//span[@id="ContentPlaceHolder1_lblShotTitle"]')[0].text_content().strip()
if not title:
title = "[No title given by state]"
bill['title'] = title
Expand Down

0 comments on commit 9f752d5

Please sign in to comment.