From c913ccc6b520af66925f095f0a469b35d658c894 Mon Sep 17 00:00:00 2001
From: Andy Lo <alo@sunlightfoundation.com>
Date: Sat, 23 Jul 2016 01:08:31 -0400
Subject: [PATCH 1/3] CA: Added committee abbreviations to recognized list.

---
 openstates/ca/bills.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/openstates/ca/bills.py b/openstates/ca/bills.py
index 647f57aad7..0eb5f660a6 100644
--- a/openstates/ca/bills.py
+++ b/openstates/ca/bills.py
@@ -14,11 +14,11 @@
 from .models import CABill
 from .actions import CACategorizer
 
-
 SPONSOR_TYPES = {'LEAD_AUTHOR': 'primary',
                  'COAUTHOR': 'cosponsor',
                  'PRINCIPAL_COAUTHOR': 'primary'}
 
+
 def clean_title(s):
     # replace smart quote characters
     s = s.replace(u'\xe2\u20ac\u201c', '-')
@@ -38,7 +38,6 @@ def clean_title(s):
 
 # Committee codes used in action chamber text.
 committee_data_upper = [
-    #('CZ09',  'Standing Committee on Floor Analyses'),
     ('Standing Committee on Governance and Finance',
       'CS73', [u'Gov. & F.']),
 
@@ -104,13 +103,15 @@ def clean_title(s):
 
     ('Standing Committee on Rules',
       'CS58', [u'RLS.']),
-    ]
+
+    ('Extraordinary Committee on Transportation and Infrastructure Development',
+        'CS67', [r'T. & I.D.']),
+]
 
 committee_data_lower = [
-    # LOWER
     ('Standing Committee on Rules',
       'CX20', [u'RLS.']),
-    #('assembly floor analysis', 'CZ01', []),
+
     ('Standing Committee on Revenue and Taxation',
       'CX19', [u'REV. & TAX']),
 
@@ -195,8 +196,17 @@ def clean_title(s):
       'CX02', [u'A. & A.R.']),
 
     ('Standing Committee on Budget',
-      'CX29', [u'BUDGET.'])
-    ]
+      'CX29', [u'BUDGET.']),
+
+    ('Standing Committee on Privacy and Consumer Protection',
+        'CX32', [u'P. & C.P.']),
+
+    ('Extraordinary Committee on Finance',
+        'CX35', [u'FINANCE']),
+
+    ('Extraordinary Committee on Public Health and Developmental Services',
+        'CX30', [u'P.H. & D.S.']),
+]
 
 committee_data_both = committee_data_upper + committee_data_lower
 
@@ -226,13 +236,15 @@ def get_committee_abbr_data():
 
 
 def get_committee_name_regex():
+    # Builds a list of all committee abbreviations.
     _committee_abbrs = map(operator.itemgetter(2), committee_data_both)
     _committee_abbrs = itertools.chain.from_iterable(_committee_abbrs)
     _committee_abbrs = sorted(_committee_abbrs, reverse=True, key=len)
     _committee_abbrs = map(slugify, _committee_abbrs)
-    #_committee_abbrs = map(re.escape, _committee_abbrs)
+
     _committee_abbr_regex = ['%s' % '[ .,]*'.join(list(abbr)) for abbr in _committee_abbrs]
     _committee_abbr_regex = re.compile('Com\.\s+on\s+(%s)\.?' % '|'.join(_committee_abbr_regex))
+
     return _committee_abbr_regex
 
 

From 09cd33dfc97c76cbf73698f2e2b4dc55ccf69169 Mon Sep 17 00:00:00 2001
From: Andy Lo <alo@sunlightfoundation.com>
Date: Sat, 23 Jul 2016 04:11:30 -0400
Subject: [PATCH 2/3] CA: Improved committee abbreviation parsing.

Now supports detecting multiple committees abbreviations in bill actions.
---
 openstates/ca/bills.py | 122 +++++++++++++++++++++--------------------
 1 file changed, 63 insertions(+), 59 deletions(-)

diff --git a/openstates/ca/bills.py b/openstates/ca/bills.py
index 0eb5f660a6..750b3f9890 100644
--- a/openstates/ca/bills.py
+++ b/openstates/ca/bills.py
@@ -39,164 +39,165 @@ def clean_title(s):
 # Committee codes used in action chamber text.
 committee_data_upper = [
     ('Standing Committee on Governance and Finance',
-      'CS73', [u'Gov. & F.']),
+        'CS73', [u'GOV. & F.', u'Gov. & F.']),
 
     ('Standing Committee on Energy, Utilities and Communications',
-      'CS71', [u'E. U. & C.', u'E., U. & C', 'E., U., & C.']),
+        'CS71', [u'E., U., & C.']),
 
     ('Standing Committee on Education',
-      'CS44', [u'ED.']),
+        'CS44', [u'ED.']),
 
     ('Standing Committee on Appropriations',
-      'CS61', [u'APPR.']),
+        'CS61', [u'APPR.']),
 
     ('Standing Committee on Labor and Industrial Relations',
-      'CS51', [u'L. & I.R.']),
+        'CS51', [u'L. & I.R.']),
 
     ('Standing Committee on Elections and Constitutional Amendments',
-      'CS45', [u'E. & C.A.']),
+        'CS45', [u'E. & C.A.']),
 
     ('Standing Committee on Environmental Quality',
-      'CS64', [u'E.Q.']),
+        'CS64', [u'E.Q.']),
 
     ('Standing Committee on Natural Resources And Water',
-      'CS55', [u'N.R. & W.']),
+        'CS55', [u'N.R. & W.']),
 
     ('Standing Committee on Public Employment and Retirement',
-      'CS56', [u'P.E. & R.']),
+        'CS56', [u'P.E. & R.']),
 
     ('Standing Committee on Governmental Organization',
-      'CS48', [u'G.O.']),
+        'CS48', [u'G.O.']),
 
     ('Standing Committee on Insurance',
-      'CS70', [u'INS.']),
+        'CS70', [u'INS.']),
 
     ('Standing Committee on Public Safety',
-      'CS72', [u'PUB. S.']),
+        'CS72', [u'PUB. S.']),
 
     ('Standing Committee on Judiciary',
-      'CS53', [u'JUD.']),
+        'CS53', [u'JUD.']),
 
     ('Standing Committee on Health',
-      'CS60', [u'HEALTH.']),
+        'CS60', [u'HEALTH']),
 
     ('Standing Committee on Transportation and Housing',
-      'CS59', [u'T. & H.']),
+        'CS59', [u'T. & H.']),
 
     ('Standing Committee on Business, Professions and Economic Development',
-      'CS42', [u'B., P. & E.D.']),
+        'CS42', [u'B., P. & E.D.']),
 
     ('Standing Committee on Agriculture',
-      'CS40', [u'AGRI.']),
+        'CS40', [u'AGRI.']),
 
     ('Standing Committee on Banking and Financial Institutions',
-      'CS69', [u'B. & F.I.']),
+        'CS69', [u'B. & F.I.']),
 
     ('Standing Committee on Veterans Affairs',
-      'CS66', [u'V.A.']),
+        'CS66', [u'V.A.']),
 
     ('Standing Committee on Budget and Fiscal Review',
-      'CS62', [u'B. & F.R.']),
+        'CS62', [u'B. & F.R.']),
 
     ('Standing Committee on Human Services',
-      'CS74', [u'HUM. S.', u'HUMAN S.']),
+        'CS74', [u'HUM. S.', u'HUMAN S.']),
 
     ('Standing Committee on Rules',
-      'CS58', [u'RLS.']),
+        'CS58', [u'RLS.']),
 
     ('Extraordinary Committee on Transportation and Infrastructure Development',
-        'CS67', [r'T. & I.D.']),
+        'CS67', [u'T. & I.D.']),
 ]
 
 committee_data_lower = [
     ('Standing Committee on Rules',
-      'CX20', [u'RLS.']),
+        'CX20', [u'RLS.']),
 
     ('Standing Committee on Revenue and Taxation',
-      'CX19', [u'REV. & TAX']),
+        'CX19', [u'REV. & TAX']),
 
     ('Standing Committee on Natural Resources',
-      'CX16', [u'NAT. RES.']),
+        'CX16', [u'NAT. RES.']),
 
     ('Standing Committee on Appropriations',
-      'CX25', [u'APPR.']),
+        'CX25', [u'APPR.']),
 
     ('Standing Committee on Insurance',
-      'CX28', ['INS.']),
+        'CX28', [u'INS.']),
 
     ('Standing Committee on Utilities and Commerce',
-      'CX23', [u'U. & C.']),
+        'CX23', [u'U. & C.']),
 
     ('Standing Committee on Education',
-      'CX03', [u'ED.']),
+        'CX03', [u'ED.']),
 
     ('Standing Committee on Public Safety',
-      'CX18', [u'PUB. S.']),
+        'CX18', [u'PUB. S.']),
 
     ('Standing Committee on Elections and Redistricting',
-      'CX04', [u'E. & R.']),
+        'CX04', [u'E. & R.']),
 
     ('Standing Committee on Judiciary',
-      'CX13', [u'JUD.', 'Jud.']),
+        'CX13', [u'JUD.']),
+
     ('Standing Committee on Higher Education',
-      'CX09', [u'HIGHER ED.']),
+        'CX09', [u'HIGHER ED.']),
 
     ('Standing Committee on Health',
-      'CX08', [u'HEALTH']),
+        'CX08', [u'HEALTH']),
 
     ('Standing Committee on Human Services',
-      'CX11', [u'HUM. S.', u'HUMAN S.']),
+        'CX11', [u'HUM. S.', u'HUMAN S.']),
 
     ('Standing Committee on Arts, Entertainment, Sports, Tourism, and Internet Media',
-      'CX37', [u'A.,E.,S.,T., & I.M.']),
+        'CX37', [u'A., E., S., T., & I.M.']),
 
     ('Standing Committee on Transportation',
-      'CX22', [u'TRANS.']),
+        'CX22', [u'TRANS.']),
 
     ('Standing Committee on Business, Professions and Consumer Protection',
-      'CX33', [u'B.,P. & C.P.', 'B., P. & C.P.', u'B. & P.']),
+        'CX33', [u'B., P., & C.P.', u'B. & P.']),
 
     ('Standing Committee on Water, Parks and Wildlife',
-      'CX24', [u'W., P. & W']),
+        'CX24', [u'W., P., & W.']),
 
     ('Standing Committee on Local Government',
-      'CX15', [u'L. GOV.', 'L. Gov.']),
+        'CX15', [u'L. GOV.', u'L. Gov.']),
 
     ('Standing Committee on Aging and Long Term Care',
-      'CX31', [u'AGING & L.T.C.']),
+        'CX31', [u'AGING & L.T.C.']),
 
     ('Standing Committee on Labor and Employment',
-      'CX14', [u'L. & E.']),
+        'CX14', [u'L. & E.']),
 
     ('Standing Committee on Governmental Organization',
-      'CX07', [u'G.O.']),
+        'CX07', [u'G.O.']),
 
     ('Standing Committee on Public Employees, Retirement and Social Security',
-      'CX17', [u'P.E., R. & S.S.']),
+        'CX17', [u'P.E., R., & S.S.']),
 
     ('Standing Committee on Veterans Affairs',
-      'CX38', [u'V.A.']),
+        'CX38', [u'V.A.']),
 
     ('Standing Committee on Housing and Community Development',
-      'CX10', [u'H. & C.D.']),
+        'CX10', [u'H. & C.D.']),
 
     ('Standing Committee on Environmental Safety and Toxic Materials',
-      'CX05', [u'E.S. & T.M.']),
+        'CX05', [u'E.S. & T.M.']),
 
     ('Standing Committee on Agriculture',
-      'CX01', [u'AGRI.']),
+        'CX01', [u'AGRI.']),
 
     ('Standing Committee on Banking and Finance',
-      'CX27', [u'B. & F.']),
+        'CX27', [u'B. & F.']),
 
     ('Standing Committee on Jobs, Economic Development and the Economy',
-      'CX34', [u'J., E.D. & E.']),
+        'CX34', [u'J., E.D., & E.']),
 
     ('Standing Committee on Accountability and Administrative Review',
-      'CX02', [u'A. & A.R.']),
+        'CX02', [u'A. & A.R.']),
 
     ('Standing Committee on Budget',
-      'CX29', [u'BUDGET.']),
+        'CX29', [u'BUDGET']),
 
     ('Standing Committee on Privacy and Consumer Protection',
         'CX32', [u'P. & C.P.']),
@@ -232,6 +233,7 @@ def get_committee_abbr_data():
 
     committee_data = {'upper': _committee_abbr_to_name_upper,
                       'lower': _committee_abbr_to_name_lower}
+
     return committee_data
 
 
@@ -240,10 +242,10 @@ def get_committee_name_regex():
     _committee_abbrs = map(operator.itemgetter(2), committee_data_both)
     _committee_abbrs = itertools.chain.from_iterable(_committee_abbrs)
     _committee_abbrs = sorted(_committee_abbrs, reverse=True, key=len)
-    _committee_abbrs = map(slugify, _committee_abbrs)
 
-    _committee_abbr_regex = ['%s' % '[ .,]*'.join(list(abbr)) for abbr in _committee_abbrs]
-    _committee_abbr_regex = re.compile('Com\.\s+on\s+(%s)\.?' % '|'.join(_committee_abbr_regex))
+    _committee_abbr_regex = ['%s' % '[\s,]*'.join(abbr.replace(',', '')
+        .split(' ')) for abbr in _committee_abbrs]
+    _committee_abbr_regex = re.compile('(%s)' % '|'.join(_committee_abbr_regex))
 
     return _committee_abbr_regex
 
@@ -463,7 +465,7 @@ def replacer(matchobj):
                 kwargs = attrs
                 matched_abbrs = committee_abbr_regex.findall(action.action)
 
-                if 'Com. on' in action.action and not matched_abbrs:
+                if re.search(r'Com[s]?. on', action.action) and not matched_abbrs:
                     msg = 'Failed to extract committee abbr from %r.'
                     self.logger.warning(msg % action.action)
 
@@ -472,13 +474,12 @@ def replacer(matchobj):
                     for abbr in matched_abbrs:
                         try:
                             name = self.committee_abbr_to_name(chamber, abbr)
+                            committees.append(name)
                         except KeyError:
                             msg = ('Mapping contains no committee name for '
                                    'abbreviation %r. Action text was %r.')
                             args = (abbr, action.action)
                             raise KeyError(msg % args)
-                        else:
-                            committees.append(name)
 
                     committees = filter(None, committees)
                     kwargs['committees'] = committees
@@ -490,8 +491,11 @@ def replacer(matchobj):
 
                     assert len(committees) == len(matched_abbrs)
                     for committee, abbr in zip(committees, matched_abbrs):
+                        act_str = act_str.replace('Coms. on ', '')
                         act_str = act_str.replace('Com. on ' + abbr, committee)
                         act_str = act_str.replace(abbr, committee)
+                        if not act_str.endswith('.'):
+                            act_str = act_str + '.'
 
                 changed = False
                 for string in ['upper', 'lower', 'joint']:

From 86ba91bdd5e314b986c8ba959722052dd972ed72 Mon Sep 17 00:00:00 2001
From: Andy Lo <alo@sunlightfoundation.com>
Date: Sat, 23 Jul 2016 12:05:55 -0400
Subject: [PATCH 3/3] Changed variable name to be more elucidating.

---
 openstates/ca/bills.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/openstates/ca/bills.py b/openstates/ca/bills.py
index 750b3f9890..416bae747b 100644
--- a/openstates/ca/bills.py
+++ b/openstates/ca/bills.py
@@ -497,14 +497,16 @@ def replacer(matchobj):
                         if not act_str.endswith('.'):
                             act_str = act_str + '.'
 
+                # Determine which chamber the action originated from.
                 changed = False
-                for string in ['upper', 'lower', 'joint']:
-                    if actor.startswith(string):
-                        actor = string
+                for committee_chamber in ['upper', 'lower', 'joint']:
+                    if actor.startswith(committee_chamber):
+                        actor = committee_chamber
                         changed = True
                         break
                 if not changed:
                     actor = 'other'
+
                 if actor != action.actor:
                     actor_info = kwargs.get('actor_info', {})
                     actor_info['details'] = action.actor