Skip to content

Commit

Permalink
Merge pull request openstates#1115 from loandy/hotfix/de
Browse files Browse the repository at this point in the history
DE: Added handling for new markup format encountered.
  • Loading branch information
Andy Lo committed May 11, 2016
2 parents c109638 + f72b491 commit 19ca590
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions openstates/de/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import lxml.html
import scrapelib
import actions
from openstates.utils import LXMLMixin
from billy.scrape import ScrapeError
from billy.scrape.bills import BillScraper, Bill
from billy.scrape.votes import Vote
from openstates.utils import LXMLMixin


class DEBillScraper(BillScraper, LXMLMixin):
jurisdiction = 'de'
Expand Down Expand Up @@ -48,14 +49,14 @@ def scrape_bill(self,link,chamber,session):
except scrapelib.HTTPError:
self.logger.warning('404. Apparently the bill hasn\'t been posted')
return
nominee = page.xpath(".//div[@id='page_header']/text()")[0]
if nominee.strip().lower() == "nominee information":
nominee = self.get_node(page, './/div[@id="page_header"]/text()')
if nominee is not None and nominee.strip().lower() == "nominee information":
self.logger.info("Nominee, skipping")
return

bill_id = page.xpath(".//div[@align='center']")
bill_id = self.get_node(page, './/div[@align="center" or @style="text-align:center"]')
try:
bill_id = bill_id[0].text_content().strip()
bill_id = bill_id.text_content().strip()
except IndexError:
self.logger.warning("Can't find bill number, skipping")
return
Expand Down Expand Up @@ -86,14 +87,22 @@ def scrape_bill(self,link,chamber,session):
#there are no classes/ids or anything, so we're going to loop
#through the individual tables and look for keywords
#in the first td to tell us what we're looking at
tables = page.xpath('.//div[@id="page_content"]/table')
tables = self.get_nodes(page, './/div[@id="page_content"]/table')

bill_title = None
primary_sponsors = []
cosponsors = []
bill_url = None
bill_documents = {}
action_list = []
vote_documents = {}
sub_link = None
bill_text_avail = False

if tables is None or not tables:
self.logger.warning('First xpath didn\'t work.')
tables = self.get_nodes(page, './/table[@style="width:837.0px"]/tr')

for table in tables:
tds = table.xpath('.//td')
if len(tds) == 0:
Expand Down Expand Up @@ -127,7 +136,6 @@ def scrape_bill(self,link,chamber,session):
amendments = tds[1].xpath('.//a')
for a in amendments:
amm = a.text
self.logger.debug(amm)
amm_text = "Amendment".format(amm.strip())
amm_slg = "+".join(amm.split())
amm_link = text_base_url.format(session=session,
Expand All @@ -140,7 +148,6 @@ def scrape_bill(self,link,chamber,session):
if "voting" in tds[0].text_content().lower():
self.find_vote(tds,vote_documents,"Amendment: ")


elif title_text.startswith('engrossed version'):
if tds[1].text_content().strip():
engrossment_base = "http://legis.delaware.gov/LIS/lis{session}.nsf/EngrossmentsforLookup/{bill_id}/$file/Engross.html?open"
Expand Down Expand Up @@ -186,7 +193,6 @@ def scrape_bill(self,link,chamber,session):

elif title_text.startswith('voting'):
self.find_vote(tds,vote_documents)

elif title_text.startswith('actions history'):
action_list = tds[1].text_content().split("\n")

Expand Down

0 comments on commit 19ca590

Please sign in to comment.