Skip to content

Commit

Permalink
:Merge branch 'master' of https://github.com/sunlightlabs/openstates
Browse files Browse the repository at this point in the history
…into al-2017
  • Loading branch information
showerst committed Aug 19, 2016
2 parents 9f752d5 + 191d7c0 commit 8ab12c9
Show file tree
Hide file tree
Showing 12 changed files with 390 additions and 298 deletions.
7 changes: 5 additions & 2 deletions openstates/ak/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def scrape(self, chamber, session):
for abbr in bill_abbrs:
bill_type = bill_types[abbr[1:]]
bill_list_url = ('http://www.legis.state.ak.us/basis/range_multi'
'.asp?session=%s&bill1=%s1&bill2=%s999' %
'.asp?session=%s&bill1=%s1&bill2=%s9999' %
(session, abbr, abbr)
)
doc = lxml.html.fromstring(self.get(bill_list_url).text)
Expand Down Expand Up @@ -207,7 +207,10 @@ def parse_vote(self, bill, action, act_chamber, act_date, url,

html = self.get(url).text
doc = lxml.html.fromstring(html)


if len(doc.xpath('//pre')) < 2:
return

# Find all chunks of text representing voting reports.
votes_text = doc.xpath('//pre')[1].text_content()
votes_text = re_vote_text.split(votes_text)
Expand Down
1 change: 1 addition & 0 deletions openstates/ca/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
Rule(r'Approved by the Governor with item veto',
'governor:vetoed:line-item'),
Rule('Vetoed by Governor', 'governor:vetoed'),
Rule('Vetoed by the Governor','governor:vetoed'),
Rule(r'To Governor', 'governor:received'),

Rule(r'amendments concurred in', 'amendment:passed'),
Expand Down
154 changes: 86 additions & 68 deletions openstates/ca/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
from .models import CABill
from .actions import CACategorizer


SPONSOR_TYPES = {'LEAD_AUTHOR': 'primary',
'COAUTHOR': 'cosponsor',
'PRINCIPAL_COAUTHOR': 'primary'}


def clean_title(s):
# replace smart quote characters
s = s.replace(u'\xe2\u20ac\u201c', '-')
Expand All @@ -38,165 +38,176 @@ def clean_title(s):

# Committee codes used in action chamber text.
committee_data_upper = [
#('CZ09', 'Standing Committee on Floor Analyses'),
('Standing Committee on Governance and Finance',
'CS73', [u'Gov. & F.']),
'CS73', [u'GOV. & F.', u'Gov. & F.']),

('Standing Committee on Energy, Utilities and Communications',
'CS71', [u'E. U. & C.', u'E., U. & C', 'E., U., & C.']),
'CS71', [u'E., U., & C.']),

('Standing Committee on Education',
'CS44', [u'ED.']),
'CS44', [u'ED.']),

('Standing Committee on Appropriations',
'CS61', [u'APPR.']),
'CS61', [u'APPR.']),

('Standing Committee on Labor and Industrial Relations',
'CS51', [u'L. & I.R.']),
'CS51', [u'L. & I.R.']),

('Standing Committee on Elections and Constitutional Amendments',
'CS45', [u'E. & C.A.']),
'CS45', [u'E. & C.A.']),

('Standing Committee on Environmental Quality',
'CS64', [u'E.Q.']),
'CS64', [u'E.Q.']),

('Standing Committee on Natural Resources And Water',
'CS55', [u'N.R. & W.']),
'CS55', [u'N.R. & W.']),

('Standing Committee on Public Employment and Retirement',
'CS56', [u'P.E. & R.']),
'CS56', [u'P.E. & R.']),

('Standing Committee on Governmental Organization',
'CS48', [u'G.O.']),
'CS48', [u'G.O.']),

('Standing Committee on Insurance',
'CS70', [u'INS.']),
'CS70', [u'INS.']),

('Standing Committee on Public Safety',
'CS72', [u'PUB. S.']),
'CS72', [u'PUB. S.']),

('Standing Committee on Judiciary',
'CS53', [u'JUD.']),
'CS53', [u'JUD.']),

('Standing Committee on Health',
'CS60', [u'HEALTH.']),
'CS60', [u'HEALTH']),

('Standing Committee on Transportation and Housing',
'CS59', [u'T. & H.']),
'CS59', [u'T. & H.']),

('Standing Committee on Business, Professions and Economic Development',
'CS42', [u'B., P. & E.D.']),
'CS42', [u'B., P. & E.D.']),

('Standing Committee on Agriculture',
'CS40', [u'AGRI.']),
'CS40', [u'AGRI.']),

('Standing Committee on Banking and Financial Institutions',
'CS69', [u'B. & F.I.']),
'CS69', [u'B. & F.I.']),

('Standing Committee on Veterans Affairs',
'CS66', [u'V.A.']),
'CS66', [u'V.A.']),

('Standing Committee on Budget and Fiscal Review',
'CS62', [u'B. & F.R.']),
'CS62', [u'B. & F.R.']),

('Standing Committee on Human Services',
'CS74', [u'HUM. S.', u'HUMAN S.']),
'CS74', [u'HUM. S.', u'HUMAN S.']),

('Standing Committee on Rules',
'CS58', [u'RLS.']),
]
'CS58', [u'RLS.']),

('Extraordinary Committee on Transportation and Infrastructure Development',
'CS67', [u'T. & I.D.']),
]

committee_data_lower = [
# LOWER
('Standing Committee on Rules',
'CX20', [u'RLS.']),
#('assembly floor analysis', 'CZ01', []),
'CX20', [u'RLS.']),

('Standing Committee on Revenue and Taxation',
'CX19', [u'REV. & TAX']),
'CX19', [u'REV. & TAX']),

('Standing Committee on Natural Resources',
'CX16', [u'NAT. RES.']),
'CX16', [u'NAT. RES.']),

('Standing Committee on Appropriations',
'CX25', [u'APPR.']),
'CX25', [u'APPR.']),

('Standing Committee on Insurance',
'CX28', ['INS.']),
'CX28', [u'INS.']),

('Standing Committee on Utilities and Commerce',
'CX23', [u'U. & C.']),
'CX23', [u'U. & C.']),

('Standing Committee on Education',
'CX03', [u'ED.']),
'CX03', [u'ED.']),

('Standing Committee on Public Safety',
'CX18', [u'PUB. S.']),
'CX18', [u'PUB. S.']),

('Standing Committee on Elections and Redistricting',
'CX04', [u'E. & R.']),
'CX04', [u'E. & R.']),

('Standing Committee on Judiciary',
'CX13', [u'JUD.', 'Jud.']),
'CX13', [u'JUD.']),

('Standing Committee on Higher Education',
'CX09', [u'HIGHER ED.']),
'CX09', [u'HIGHER ED.']),

('Standing Committee on Health',
'CX08', [u'HEALTH']),
'CX08', [u'HEALTH']),

('Standing Committee on Human Services',
'CX11', [u'HUM. S.', u'HUMAN S.']),
'CX11', [u'HUM. S.', u'HUMAN S.']),

('Standing Committee on Arts, Entertainment, Sports, Tourism, and Internet Media',
'CX37', [u'A.,E.,S.,T., & I.M.']),
'CX37', [u'A., E., S., T., & I.M.']),

('Standing Committee on Transportation',
'CX22', [u'TRANS.']),
'CX22', [u'TRANS.']),

('Standing Committee on Business, Professions and Consumer Protection',
'CX33', [u'B.,P. & C.P.', 'B., P. & C.P.', u'B. & P.']),
'CX33', [u'B., P., & C.P.', u'B. & P.']),

('Standing Committee on Water, Parks and Wildlife',
'CX24', [u'W., P. & W']),
'CX24', [u'W., P., & W.']),

('Standing Committee on Local Government',
'CX15', [u'L. GOV.', 'L. Gov.']),
'CX15', [u'L. GOV.', u'L. Gov.']),

('Standing Committee on Aging and Long Term Care',
'CX31', [u'AGING & L.T.C.']),
'CX31', [u'AGING & L.T.C.']),

('Standing Committee on Labor and Employment',
'CX14', [u'L. & E.']),
'CX14', [u'L. & E.']),

('Standing Committee on Governmental Organization',
'CX07', [u'G.O.']),
'CX07', [u'G.O.']),

('Standing Committee on Public Employees, Retirement and Social Security',
'CX17', [u'P.E., R. & S.S.']),
'CX17', [u'P.E., R., & S.S.']),

('Standing Committee on Veterans Affairs',
'CX38', [u'V.A.']),
'CX38', [u'V.A.']),

('Standing Committee on Housing and Community Development',
'CX10', [u'H. & C.D.']),
'CX10', [u'H. & C.D.']),

('Standing Committee on Environmental Safety and Toxic Materials',
'CX05', [u'E.S. & T.M.']),
'CX05', [u'E.S. & T.M.']),

('Standing Committee on Agriculture',
'CX01', [u'AGRI.']),
'CX01', [u'AGRI.']),

('Standing Committee on Banking and Finance',
'CX27', [u'B. & F.']),
'CX27', [u'B. & F.']),

('Standing Committee on Jobs, Economic Development and the Economy',
'CX34', [u'J., E.D. & E.']),
'CX34', [u'J., E.D., & E.']),

('Standing Committee on Accountability and Administrative Review',
'CX02', [u'A. & A.R.']),
'CX02', [u'A. & A.R.']),

('Standing Committee on Budget',
'CX29', [u'BUDGET.'])
]
'CX29', [u'BUDGET']),

('Standing Committee on Privacy and Consumer Protection',
'CX32', [u'P. & C.P.']),

('Extraordinary Committee on Finance',
'CX35', [u'FINANCE']),

('Extraordinary Committee on Public Health and Developmental Services',
'CX30', [u'P.H. & D.S.']),
]

committee_data_both = committee_data_upper + committee_data_lower

Expand All @@ -222,17 +233,20 @@ def get_committee_abbr_data():

committee_data = {'upper': _committee_abbr_to_name_upper,
'lower': _committee_abbr_to_name_lower}

return committee_data


def get_committee_name_regex():
# Builds a list of all committee abbreviations.
_committee_abbrs = map(operator.itemgetter(2), committee_data_both)
_committee_abbrs = itertools.chain.from_iterable(_committee_abbrs)
_committee_abbrs = sorted(_committee_abbrs, reverse=True, key=len)
_committee_abbrs = map(slugify, _committee_abbrs)
#_committee_abbrs = map(re.escape, _committee_abbrs)
_committee_abbr_regex = ['%s' % '[ .,]*'.join(list(abbr)) for abbr in _committee_abbrs]
_committee_abbr_regex = re.compile('Com\.\s+on\s+(%s)\.?' % '|'.join(_committee_abbr_regex))

_committee_abbr_regex = ['%s' % '[\s,]*'.join(abbr.replace(',', '')
.split(' ')) for abbr in _committee_abbrs]
_committee_abbr_regex = re.compile('(%s)' % '|'.join(_committee_abbr_regex))

return _committee_abbr_regex


Expand Down Expand Up @@ -451,7 +465,7 @@ def replacer(matchobj):
kwargs = attrs
matched_abbrs = committee_abbr_regex.findall(action.action)

if 'Com. on' in action.action and not matched_abbrs:
if re.search(r'Com[s]?. on', action.action) and not matched_abbrs:
msg = 'Failed to extract committee abbr from %r.'
self.logger.warning(msg % action.action)

Expand All @@ -460,13 +474,12 @@ def replacer(matchobj):
for abbr in matched_abbrs:
try:
name = self.committee_abbr_to_name(chamber, abbr)
committees.append(name)
except KeyError:
msg = ('Mapping contains no committee name for '
'abbreviation %r. Action text was %r.')
args = (abbr, action.action)
raise KeyError(msg % args)
else:
committees.append(name)

committees = filter(None, committees)
kwargs['committees'] = committees
Expand All @@ -478,17 +491,22 @@ def replacer(matchobj):

assert len(committees) == len(matched_abbrs)
for committee, abbr in zip(committees, matched_abbrs):
act_str = act_str.replace('Coms. on ', '')
act_str = act_str.replace('Com. on ' + abbr, committee)
act_str = act_str.replace(abbr, committee)
if not act_str.endswith('.'):
act_str = act_str + '.'

# Determine which chamber the action originated from.
changed = False
for string in ['upper', 'lower', 'joint']:
if actor.startswith(string):
actor = string
for committee_chamber in ['upper', 'lower', 'joint']:
if actor.startswith(committee_chamber):
actor = committee_chamber
changed = True
break
if not changed:
actor = 'other'

if actor != action.actor:
actor_info = kwargs.get('actor_info', {})
actor_info['details'] = action.actor
Expand Down
18 changes: 15 additions & 3 deletions openstates/de/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,23 @@ def scrape_bill(self,link,chamber,session):
except requests.exceptions.HTTPError:
self.logger.warning("could not access vote document")
continue

vote_page = self.lxmlize(doc)
vote_info = vote_page.xpath(".//div[@id='page_content']/p")[-1]

try:
vote_info = vote_page.xpath('.//div[@id="page_content"]/p')[-1]
vote_tds = vote_page.xpath(".//table//td")
except IndexError:
vote_info = vote_page.xpath('.//form[1]')[0]
vote_tds = vote_page.xpath('.//table[@border="0"]//td')

yes_votes = []
no_votes = []
other_votes = []
other_votes = []

lines = vote_info.text_content().split("\n")
lines = filter(None, lines)

for line in lines:
if line.strip().startswith("Date"):
date_str = " ".join(line.split()[1:4])
Expand All @@ -279,8 +290,9 @@ def scrape_bill(self,link,chamber,session):
no_count = int(re.findall("No: (\d+)",line)[0])
other_count = int(re.findall("Not Voting: (\d+)",line)[0])
other_count += int(re.findall("Absent: (\d+)",line)[0])
vote_tds = vote_page.xpath(".//table//td")

person_seen = False

for td in vote_tds:
if person_seen:
person_vote = td.text_content().strip()
Expand Down
Loading

0 comments on commit 8ab12c9

Please sign in to comment.