Got resolutions scraper working, proper alias for 2016 special for pd…

…f filenames
showerst · Aug 16, 2016 · 9f752d5 · 9f752d5
1 parent d862c15
commit 9f752d5
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 23 deletions.
diff --git a/openstates/al/__init__.py b/openstates/al/__init__.py
@@ -24,7 +24,7 @@
             'name': '2015-2018',
             'start_year': 2015,
             'end_year': 2018,
-            'sessions': ['2015os','2015rs', '2015fs', '2015ss', '2016rs','2016ss','2017rs'],
+            'sessions': ['2015os','2015rs', '2015fs', '2015ss', '2016rs','2016fs','2017rs'],
         }
     ],
     'session_details': {
@@ -88,7 +88,7 @@
             'internal_id': '1065',
             '_scraped_name': 'Regular Session 2016',
         },
-        '2016ss': {
+        '2016fs': {
             'type': 'special',
             'display_name': 'First Special Session 2016',
             'internal_id': '1068',

diff --git a/openstates/al/bills.py b/openstates/al/bills.py
@@ -107,23 +107,12 @@ def _get_bill_list(self, url):
         for _retry in range(self.retry_attempts):
             html = self.get(url=url).text
 
-            #print html
-
             doc = lxml.html.fromstring(html)
 
-            bills = doc.xpath('//table[@id="ContentPlaceHolder1_gvBills"]/tr')[1:]
-
-            resolutions = doc.xpath(
-                '//table[@id="ContentPlaceHolder1_gvResolutions"]/tr')[1:]
-
-            print  doc.xpath(
-                    '//span[@id="ContentPlaceHolder1_lblCount"]/font/text()'
-                    )
+            listing = doc.xpath('//table[@id="ContentPlaceHolder1_gvBills"]/tr')[1:]
 
-            if bills and resolutions:
-                raise AssertionError("Found multiple bill types")
-            elif bills or resolutions:
-                return bills or resolutions
+            if listing:
+                return listing
             elif doc.xpath(
                     '//span[@id="ContentPlaceHolder1_lblCount"]/font/text()'
                     ) == ["0 Instruments", ]:
@@ -136,7 +125,6 @@ def _get_bill_list(self, url):
                     )
                 continue
         else:
-            #print html
             raise AssertionError("Bill list not found")
 
     def _get_bill_response(self, url):
@@ -186,18 +174,21 @@ def scrape(self, session, chambers):
 
         self.scrape_bill_list(BILL_LIST_URL)
 
-        #self._set_session(session)
+        self._set_session(session)
 
         # Acquire and process a list of all resolutions
         RESOLUTION_TYPE_URL = (
             'http://alisondb.legislature.state.al.us/Alison/'
-            'SESSResosBySelectedStatus.aspx')
+            'SESSResosBySelectedStatus.aspx?BODYID=1755')
         RESOLUTION_LIST_URL = (
             'http://alisondb.legislature.state.al.us/Alison/'
             'SESSResosList.aspx?STATUSCODES=Had%20First%20Reading'
             '%20House%20of%20Origin&BODY=999999')
 
-        doc = lxml.html.fromstring(self.get(url=BILL_TYPE_URL).text)
+        resText = self.get(url=RESOLUTION_TYPE_URL).text
+
+        doc = lxml.html.fromstring(resText)
+
         (viewstate, ) = doc.xpath('//input[@id="__VIEWSTATE"]/@value')
         (viewstategenerator, ) = doc.xpath(
             '//input[@id="__VIEWSTATEGENERATOR"]/@value')
@@ -207,9 +198,10 @@ def scrape(self, session, chambers):
             '__EVENTARGUMENT': 'Select$0',
             '__VIEWSTATE': viewstate,
             '__VIEWSTATEGENERATOR': viewstategenerator,
-            'ctl00$ScriptManager1': 'ctl00$UpdatePanel1|ctl00$'
+            'ctl00$ScriptManager1': 'tctl00$UpdatePanel1|ctl00$'
                                     'MainDefaultContent$gvStatus$ctl02$ctl00'
         }
+
         deb = self.post(url=RESOLUTION_TYPE_URL, data=form, allow_redirects=True)
 
         self.scrape_bill_list(RESOLUTION_LIST_URL)
@@ -259,8 +251,10 @@ def scrape_bill_list(self, url):
                 continue
             bill_doc = lxml.html.fromstring(bill_html)
 
-            title = bill_doc.xpath(
-                '//span[@id="ContentPlaceHolder1_lblShotTitle"]//text()')[0].strip()
+            if( bill_doc.xpath(
+                '//span[@id="ContentPlaceHolder1_lblShotTitle"]') ):
+                title = bill_doc.xpath(
+                '//span[@id="ContentPlaceHolder1_lblShotTitle"]')[0].text_content().strip()
             if not title:
                 title = "[No title given by state]"
             bill['title'] = title