From 9668464d8c9fb88e715bb8552b374ac2a93be28c Mon Sep 17 00:00:00 2001 From: James Turk Date: Tue, 16 Apr 2013 17:04:57 -0400 Subject: [PATCH 1/5] validate votes in OK --- openstates/ok/bills.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openstates/ok/bills.py b/openstates/ok/bills.py index 72649f048d..a6efa58336 100644 --- a/openstates/ok/bills.py +++ b/openstates/ok/bills.py @@ -227,6 +227,7 @@ def scrape_votes(self, bill, url): for name in votes['other']: vote.other(name) + vote.validate() bill.add_vote(vote) def scrape_subjects(self, chamber, session): From 3b9e68b5a30752a7b27597dc7e9319b1434ef801 Mon Sep 17 00:00:00 2001 From: Paul Tagliamonte Date: Wed, 17 Apr 2013 09:41:08 -0400 Subject: [PATCH 2/5] NM: Bills: Tweak the name regex --- openstates/nm/bills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openstates/nm/bills.py b/openstates/nm/bills.py index f21114f092..a4b6a27228 100644 --- a/openstates/nm/bills.py +++ b/openstates/nm/bills.py @@ -517,7 +517,7 @@ def parse_senate_vote(self, url): # pull votes out matches = re.match( - ' ([A-Z,.]+)(\s+)X\s+([A-Z,.]+)(\s+)X', line) + ' ([A-Z,\'\-.]+)(\s+)X\s+([A-Z,\'\-.]+)(\s+)X', line) if matches is not None: matches = matches.groups() From 682737248b86efc42b3485d1795c536b98551fb6 Mon Sep 17 00:00:00 2001 From: James Turk Date: Wed, 17 Apr 2013 09:54:09 -0400 Subject: [PATCH 3/5] KS: vote fix --- openstates/ks/bills.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/openstates/ks/bills.py b/openstates/ks/bills.py index bd3044314d..a38049b81b 100644 --- a/openstates/ks/bills.py +++ b/openstates/ks/bills.py @@ -164,7 +164,8 @@ def parse_vote(self, bill, vote_date, vote_chamber, vote_status, vote_url): os.remove(vote_doc) - comma_or_and = re.compile(', (?!Sr.|Jr.)|\sand\s') + comma_or_and = re.compile(', |\sand\s') + comma_or_and_jrsr = re.compile(', (?!Sr.|Jr.)|\sand\s') vote = None passed = True @@ -188,7 +189,12 @@ def parse_vote(self, bill, vote_date, vote_chamber, vote_status, vote_url): vote.yes(member) elif vote and line.startswith('Nays:'): line = line.split(':', 1)[1].strip() - for member in comma_or_and.split(line): + # slightly different vote format if Jr stands alone on a line + if ', Jr.,' in line: + regex = comma_or_and_jrsr + else: + regex = comma_or_and + for member in regex.split(line): if member != 'None.': vote.no(member) elif vote and line.startswith('Present '): From a2cb939e8c2cb8367720f73f414d8963418c4cf3 Mon Sep 17 00:00:00 2001 From: James Turk Date: Wed, 17 Apr 2013 10:33:54 -0400 Subject: [PATCH 4/5] OK: vote fixes --- openstates/ok/bills.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/openstates/ok/bills.py b/openstates/ok/bills.py index a6efa58336..e8c9e184d3 100644 --- a/openstates/ok/bills.py +++ b/openstates/ok/bills.py @@ -178,25 +178,28 @@ def scrape_votes(self, bill, url): counts = collections.defaultdict(int) votes = collections.defaultdict(list) + seen_yes = False + for sib in header.xpath("following-sibling::p")[13:]: line = sib.xpath("string()").replace('\r\n', ' ').strip() if "*****" in line: break match = re.match( - r'(YEAS|NAYS|EXCUSED|VACANT|CONSTITUTIONAL PRIVILEGE|NOT VOTING)\s*:\s*(\d+)', + r'(YEAS|NAYS|EXCUSED|VACANT|CONSTITUTIONAL PRIVILEGE|NOT VOTING|N/V)\s*:\s*(\d+)', line) if match: - if match.group(1) == 'YEAS': + if match.group(1) == 'YEAS' and 'RCS#' not in line: vtype = 'yes' - elif match.group(1) == 'NAYS': + seen_yes = True + elif match.group(1) == 'NAYS' and seen_yes: vtype = 'no' elif match.group(1) == 'VACANT': continue # skip these - else: + elif seen_yes: vtype = 'other' counts[vtype] += int(match.group(2)) - else: + elif seen_yes: for name in line.split(' '): if not name: continue @@ -217,12 +220,11 @@ def scrape_votes(self, bill, url): vote.add_source(url) - if ':' in name: - raise Exception(name) - for name in votes['yes']: vote.yes(name) for name in votes['no']: + if ':' in name: + raise Exception(name) vote.no(name) for name in votes['other']: vote.other(name) From ab104d6ea04cc5ce745794f194871385cd1d9938 Mon Sep 17 00:00:00 2001 From: James Turk Date: Wed, 17 Apr 2013 10:34:56 -0400 Subject: [PATCH 5/5] MN: fix version url --- openstates/mn/bills.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openstates/mn/bills.py b/openstates/mn/bills.py index 81b20ac149..05caa56050 100644 --- a/openstates/mn/bills.py +++ b/openstates/mn/bills.py @@ -11,7 +11,7 @@ BILL_DETAIL_URL_BASE = 'https://www.revisor.mn.gov/revisor/pages/search_status/' # The versions of a bill use a different base URL. -VERSION_URL_BASE = "https://www.revisor.mn.gov" +VERSION_URL_BASE = "https://www.revisor.mn.gov/bills/" class MNBillScraper(BillScraper): jurisdiction = 'mn'