Skip to content

Commit

Permalink
Added code to scrape prefiled bills if within prefile timespan
Browse files Browse the repository at this point in the history
  • Loading branch information
showerst committed Aug 9, 2016
1 parent f886e39 commit e384e25
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 6 deletions.
18 changes: 17 additions & 1 deletion openstates/ky/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@
'2015RS', '2016RS',
]
},
{
'name': '2017-2018',
'start_year': 2017,
'end_year': 2018,
'sessions': [
'2017RS',
]
},
],
'session_details': {
'2011 Regular Session': {
Expand Down Expand Up @@ -108,8 +116,16 @@
'display_name': '2016 Regular Session',
'_scraped_name': '2016 Regular Session',
},
'2017RS': {
'type': 'primary',
'start_date': datetime.date(2017, 1, 5),
'end_date': datetime.date(2017, 4, 12),
'prefile_start_date': datetime.date(2016, 8, 1),
'display_name': '2017 Regular Session',
'_scraped_name': '2017 Regular Session',
},
},
'feature_flags': ['subjects', 'events', 'influenceexplorer'],
'feature_flags': ['subjects', 'events', 'influenceexplorer','prefiles'],
'_ignored_scraped_sessions': [],
}

Expand Down
31 changes: 26 additions & 5 deletions openstates/ky/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,35 @@ def scrape_subjects(self, session):
self._subjects[bill.replace(' ', '')].append(subject)

def scrape(self, chamber, session):
#if _prefiles_run:
#return

# Bill page markup changed starting with the 2016 regular session.
if (self.metadata['session_details'][session]['start_date'] >=
self.metadata['session_details']['2016RS']['start_date']):
self._is_post_2016 = True

self.scrape_subjects(session)
self.scrape_session(chamber, session)
for sub in self.metadata['session_details'][session].get('sub_sessions', []):
self.scrape_session(chamber, sub)


#KY does prefiles in a seperate page
today = datetime.date.today()

if ('prefile_start_date' in self.metadata['session_details'][session]
and self.metadata['session_details'][session]['start_date'] >= today
and self.metadata['session_details'][session]['prefile_start_date'] <= today):
self.scrape_prefile_list(chamber, session)
else:
self.scrape_subjects(session)
self.scrape_session(chamber, session)
for sub in self.metadata['session_details'][session].get('sub_sessions', []):
self.scrape_session(chamber, sub)

def scrape_prefile_list(self, chamber, session):
bill_url = 'http://www.lrc.ky.gov/record/17RS/prefiled/prefiled_bills.htm'
if 'upper' == chamber:
bill_url = 'http://www.lrc.ky.gov/record/17RS/prefiled/prefiled_sponsor_senate.htm'
elif 'lower' == chamber:
bill_url = 'http://www.lrc.ky.gov/record/17RS/prefiled/prefiled_sponsor_house.htm'
self.scrape_bill_list(chamber, session, bill_url)

def scrape_session(self, chamber, session):
bill_url = session_url(session) + "bills_%s.htm" % chamber_abbr(chamber)
Expand All @@ -73,6 +93,7 @@ def scrape_bill_list(self, chamber, session, url):
else:
bill_id = bill_abbr + bill_id

bill_id = bill_id.replace('*','')
self.parse_bill(chamber, session, bill_id,
link.attrib['href'])

Expand Down

0 comments on commit e384e25

Please sign in to comment.