From 670728a1b90c15d0f69ba3a2cd1c3eeedceded1a Mon Sep 17 00:00:00 2001 From: crdunwel Date: Wed, 1 Jun 2016 13:46:56 -0400 Subject: [PATCH] MN: handle bills that have yet to be transmitted to othe rchamber --- openstates/mn/bills.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/openstates/mn/bills.py b/openstates/mn/bills.py index 7d2427e446..7a4e61c08c 100644 --- a/openstates/mn/bills.py +++ b/openstates/mn/bills.py @@ -59,7 +59,6 @@ class MNBillScraper(BillScraper, LXMLMixin): ("Received from", "bill:introduced"), ) - def scrape(self, chamber, session): """ Scrape all bills for a given chamber and a given session. @@ -92,7 +91,6 @@ def scrape(self, chamber, session): for b in bills: self.get_bill_info(chamber, session, b['bill_url'], b['version_url']) - def get_full_bill_list(self, chamber, session): """ Uses the legislator search to get a full list of bills. Search page @@ -116,7 +114,6 @@ def get_full_bill_list(self, chamber, session): # bill: Range start-end (e.g. 1-10) url = BILL_SEARCH_URL % (search_chamber, search_session, start, start + stride, bill_type) - # Parse HTML html = self.get(url).text doc = lxml.html.fromstring(html) @@ -147,7 +144,6 @@ def get_full_bill_list(self, chamber, session): return bills - def get_bill_info(self, chamber, session, bill_detail_url, version_list_url): """ Extracts all the requested info for a given bill. @@ -160,6 +156,12 @@ def get_bill_info(self, chamber, session, bill_detail_url, version_list_url): # Get html and parse doc = self.lxmlize(bill_detail_url) + # Check if bill hasn't been transmitted to the other chamber yet + transmit_check = self.get_node(doc, '//h1[text()[contains(.,"Bills")]]/following-sibling::ul/li/text()') + if transmit_check is not None and 'has not been transmitted' in transmit_check.strip(): + self.logger.debug('Bill has not been transmitted to other chamber ... skipping {0}'.format(bill_detail_url)) + return + # Get the basic parts of the bill bill_id = self.get_node(doc, '//h1/text()') self.logger.debug(bill_id) @@ -208,7 +210,6 @@ def get_bill_info(self, chamber, session, bill_detail_url, version_list_url): self.save_bill(bill) - def get_bill_topics(self, chamber, session): """ Uses the leg search to map topics to bills. @@ -241,7 +242,6 @@ def get_bill_topics(self, chamber, session): bill = self.make_bill_id(bill) self._subject_mapping[bill].append(subject) - def extract_actions(self, bill, doc, current_chamber): """ Extract the actions taken on a bill. @@ -343,7 +343,6 @@ def extract_actions(self, bill, doc, current_chamber): return bill - def extract_sponsors(self, bill, doc, chamber): """ Extracts sponsors from bill page. @@ -362,7 +361,6 @@ def extract_sponsors(self, bill, doc, chamber): return bill - def extract_versions(self, bill, doc, chamber, version_list_url): """ Versions of a bill are on a separate page, linked to from the column @@ -384,7 +382,6 @@ def extract_versions(self, bill, doc, chamber, version_list_url): return bill - # def extract_vote_from_action(self, bill, action, chamber, action_row): # """ # Gets vote data. For the Senate, we can only get yes and no @@ -437,7 +434,6 @@ def make_bill_id(self, bill): return re.sub(r'(\w+?)0*(\d+)', r'\1 \2', bill) - def chamber_from_bill(self, bill): """ Given a bill id, determine chamber. @@ -447,28 +443,24 @@ def chamber_from_bill(self, bill): return 'lower' if bill.lower().startswith('hf') else 'upper' - def other_chamber(self, chamber): """ Given a chamber, get the other. """ return 'lower' if chamber == 'upper' else 'upper' - def search_chamber(self, chamber): """ Given chamber, like lower, make into MN site friendly search chamber. """ return { 'lower':'House', 'upper':'Senate' }[chamber] - def search_session(self, session): """ Given session ID, make into MN site friendly search. """ return self.metadata['session_details'][session]['site_id'] - def is_testing(self): """ Determine if this is test mode.