Skip to content

Commit

Permalink
Merge pull request openstates#1125 from showerst/nm-mimetype
Browse files Browse the repository at this point in the history
Added check for PDF mimetype instead of assuming text/html
  • Loading branch information
Andy Lo committed May 19, 2016
2 parents 1e94c2e + c1a7092 commit dd4cf5c
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions openstates/nm/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ def check_docs(url, doc_type):
match = re.match('([A-Z]+)0*(\d{1,4})', fname)
if match:
bill_type, bill_num = match.groups()
mimetype = "application/pdf" if fname.lower().endswith("pdf") else "text/html"

if (chamber == "upper" and bill_type[0] == "S") or (chamber == "lower" and bill_type[0] == "H"):
bill_id = bill_type.replace('B', '') + bill_num
try:
Expand All @@ -242,9 +244,9 @@ def check_docs(url, doc_type):
else:
if doc_type == 'Final Version':
bill.add_version(
'Final Version', url + fname, mimetype='text/html')
'Final Version', url + fname, mimetype=mimetype)
else:
bill.add_document(doc_type, url + fname)
bill.add_document(doc_type, url + fname, mimetype=mimetype)

check_docs(firs_url, 'Fiscal Impact Report')
check_docs(lesc_url, 'LESC Analysis')
Expand Down Expand Up @@ -442,11 +444,13 @@ def scrape_documents(self, session, doctype, chamber, chamber_name=None):
except KeyError:
self.warning('document for unknown bill %s' % fname)
continue

mimetype = "application/pdf" if fname.lower().endswith("pdf") else "text/html"

# no suffix = just the bill
if suffix == '':
bill.add_version('introduced version', doc_path + fname,
mimetype='text/html')
mimetype=mimetype)

# floor amendments
elif re.match('F(S|H)\d', suffix):
Expand All @@ -459,7 +463,7 @@ def scrape_documents(self, session, doctype, chamber, chamber_name=None):
elif suffix.endswith('S'):
committee_name = suffix[:-1]
bill.add_version('%s substitute' % committee_name,
doc_path + fname, mimetype='text/html')
doc_path + fname, mimetype=mimetype)
# votes
elif 'SVOTE' in suffix:
vote = self.parse_senate_vote(doc_path + fname)
Expand All @@ -477,7 +481,7 @@ def scrape_documents(self, session, doctype, chamber, chamber_name=None):
elif re.match('\w{2,3,4}\d', suffix):
committee_name = re.match('[A-Z]+', suffix).group()
bill.add_document('%s committee report' % committee_name,
doc_path + fname)
doc_path + fname, mimetype=mimetype)

# ignore list, mostly typos reuploaded w/ proper name
elif suffix in ('HEC', 'HOVTE', 'GUI'):
Expand Down

0 comments on commit dd4cf5c

Please sign in to comment.