Skip to content

Commit

Permalink
Updated SC bills to scrape prefiles
Browse files Browse the repository at this point in the history
  • Loading branch information
showerst committed Dec 15, 2016
1 parent fd639a3 commit 6a599fe
Showing 1 changed file with 30 additions and 1 deletion.
31 changes: 30 additions & 1 deletion openstates/sc/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,11 @@ class SCBillScraper(BillScraper):
urls = {
'lower' : {
'daily-bill-index': "http://www.scstatehouse.gov/hintro/hintros.php",
'prefile-index': "http://www.scstatehouse.gov/sessphp/prefil17.php",
},
'upper' : {
'daily-bill-index': "http://www.scstatehouse.gov/sintro/sintros.php",
'prefile-index': "http://www.scstatehouse.gov/sessphp/prefil17.php",
}
}

Expand Down Expand Up @@ -252,7 +254,7 @@ def scrape_details(self, bill_detail_url, session, chamber, bill_id):
def scrape(self, chamber, session):
# start with subjects
session_code = self.metadata['session_details'][session]['_code']
self.scrape_subjects(session_code)
#self.scrape_subjects(session_code)

# get bill index
index_url = self.urls[chamber]['daily-bill-index']
Expand All @@ -278,3 +280,30 @@ def scrape(self, chamber, session):
if bill_id.startswith(chamber_letter):
self.scrape_details(bill_a.get('href'), session, chamber,
bill_id)

prefile_url = self.urls[chamber]['prefile-index']
page = self.get(prefile_url).text
doc = lxml.html.fromstring(page)
doc.make_links_absolute(prefile_url)

# visit each day and extract bill ids
days = ''
if chamber == 'lower':
days = doc.xpath('//dd[contains(text(),"House")]/a/@href')
else:
days = doc.xpath('//dd[contains(text(),"Senate")]/a/@href')

for day_url in days:
try:
data = self.get(day_url).text
except scrapelib.HTTPError:
continue

doc = lxml.html.fromstring(data)
doc.make_links_absolute(day_url)

for bill_a in doc.xpath('//p/a[1]'):
bill_id = bill_a.text.replace('.', '')
if bill_id.startswith(chamber_letter):
self.scrape_details(bill_a.get('href'), session, chamber,
bill_id)

0 comments on commit 6a599fe

Please sign in to comment.