forked from unitedstates/congress-legislators
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsenate_contacts.py
executable file
·167 lines (126 loc) · 5.37 KB
/
senate_contacts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env python
# Update current senator's website and address from www.senate.gov.
import lxml.etree, io
import string, re
from datetime import datetime
import utils
from utils import download, load_data, save_data, parse_date
def run():
today = datetime.now().date()
# default to not caching
cache = utils.flags().get('cache', False)
force = not cache
y = load_data("legislators-current.yaml")
# Map bioguide IDs to dicts. Reference the same dicts
# in y so we are updating y when we update biogiude.
bioguide = { }
by_name = { }
for m in y:
if "bioguide" in m["id"]:
bioguide[m["id"]["bioguide"]] = m
party = m["terms"][-1]["party"][0]
state = m["terms"][-1]["state"]
last_name = m["name"]["last"]
member_full = "%s (%s-%s)" % (last_name, party, state)
by_name[member_full] = m
print("Fetching general Senate information from senators_cfm.xml...")
url = "http://www.senate.gov/general/contact_information/senators_cfm.xml"
body = download(url, "legislators/senate.xml", force)
dom = lxml.etree.parse(io.BytesIO(body.encode("utf8"))) # file has an <?xml declaration and so must be parsed as a bytes array
for node in dom.xpath("member"):
bioguide_id = str(node.xpath("string(bioguide_id)")).strip()
member_full = node.xpath("string(member_full)")
if bioguide_id == "":
print("Someone has an empty bioguide ID!")
print(lxml.etree.tostring(node))
continue
print("[%s] Processing Senator %s..." % (bioguide_id, member_full))
# find member record in our YAML, either by bioguide_id or member_full
if bioguide_id in bioguide:
member = bioguide[bioguide_id]
else:
if member_full in by_name:
member = by_name[member_full]
else:
print("Bioguide ID '%s' and full name '%s' not recognized." % (bioguide_id, member_full))
exit(0)
try:
term = member["terms"][-1]
except IndexError:
print("Member has no terms", bioguide_id, member_full)
continue
if today < parse_date(term["start"]) or today > parse_date(term["end"]):
print("Member's last listed term is not current", bioguide_id, member_full, term["start"])
continue
if term["type"] != "sen":
print("Member's last listed term is not a Senate term", bioguide_id, member_full)
continue
if term["state"] != str(node.xpath("string(state)")):
print("Member's last listed term has the wrong state", bioguide_id, member_full)
continue
if "district" in term: del term["district"]
full_name = str(node.xpath("string(first_name)"))
suffix = None
if ", " in full_name: full_name, suffix = full_name.split(", ")
full_name += " " + str(node.xpath("string(last_name)"))
if suffix: full_name += ", " + suffix
member["name"]["official_full"] = full_name
member["id"]["bioguide"] = bioguide_id
term["class"] = { "Class I": 1, "Class II": 2, "Class III": 3}[ node.xpath("string(class)") ]
term["party"] = { "D": "Democrat", "R": "Republican", "I": "Independent", "ID": "Independent"}[ node.xpath("string(party)") ]
url = str(node.xpath("string(website)")).strip()
# kill trailing slashes and force hostname to lowercase since around December 2013 they started uppercasing "Senate.gov"
url = re.sub("/$", "", url).replace(".Senate.gov", ".senate.gov")
if not url.startswith("/"): term["url"] = url # temporary home pages for new senators
term["address"] = str(node.xpath("string(address)")).strip().replace("\n ", " ")
term["office"] = string.capwords(term["address"].upper().split(" WASHINGTON ")[0])
phone = str(node.xpath("string(phone)")).strip()
term["phone"] = phone.replace("(", "").replace(")", "").replace(" ", "-")
#contact_form = str(node.xpath("string(email)")).strip().replace(".Senate.gov", ".senate.gov")
#if contact_form: # can be blank
# term["contact_form"] = contact_form
print("\n\nUpdating Senate stateRank and LIS ID from cvc_member_data.xml...")
url = "http://www.senate.gov/legislative/LIS_MEMBER/cvc_member_data.xml"
body = download(url, "legislators/senate_cvc.xml", force)
dom = lxml.etree.parse(io.StringIO(body))
for node in dom.getroot():
if node.tag == "lastUpdate":
date, time = node.getchildren()
print("Last updated: %s, %s" % (date.text, time.text))
continue
bioguide_id = str(node.xpath("string(bioguideId)")).strip()
if bioguide_id == "":
print("Someone has an empty bioguide ID!")
print(lxml.etree.tostring(node))
continue
last_name = node.xpath("string(name/last)")
party = node.xpath("string(party)")
state = node.xpath("string(state)")
member_full = "%s (%s-%s)" % (last_name, party, state)
print("[%s] Processing Senator %s..." % (bioguide_id, member_full))
# find member record in our YAML, either by bioguide_id or member_full
if bioguide_id in bioguide:
member = bioguide[bioguide_id]
else:
if member_full in by_name:
member = by_name[member_full]
else:
print("Bioguide ID '%s' and synthesized official name '%s' not recognized." % (bioguide_id, member_full))
exit(0)
try:
term = member["terms"][-1]
except IndexError:
print("Member has no terms", bioguide_id, member_full)
continue
if "id" not in member:
member["id"] = {}
member["id"]["lis"] = node.attrib["lis_member_id"]
state_rank = node.xpath("string(stateRank)")
if state_rank == '1':
term["state_rank"] = "senior"
elif state_rank == '2':
term["state_rank"] = "junior"
print("Saving data...")
save_data(y, "legislators-current.yaml")
if __name__ == '__main__':
run()