-
Notifications
You must be signed in to change notification settings - Fork 153
/
congress.py
55 lines (45 loc) · 2.21 KB
/
congress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
''' Use k-means to locate voting clusters in the U.S. Congress.
Data set: Senate voting record on 28 passed bills for the 2016 (114th Congress)
Source: https://www.govtrack.us/congress/votes#chamber[]=1&category[]=3
'''
from typing import Tuple, List, Dict, DefaultDict, NamedTuple
from kmeans import k_means, assign_data
from collections import defaultdict, Counter
import csv
import glob
VoteValue = float
class Senator(NamedTuple):
name: str
party: str
state: str
# Load votes arranged by topic and accumulate votes by senator
vote_value = {'Nay': -1, 'Not Voting': 0, 'Yea': 1} # type: Dict[str, VoteValue]
accumulated_record = defaultdict(list) # type: DefaultDict[Senator, List[VoteValue]]
for filename in glob.glob('congress_data/*.csv'):
with open(filename) as f:
reader = csv.reader(f)
vote_topic = next(reader)
headers = next(reader)
for person, state, district, vote, name, party in reader:
senator = Senator(name, party, state)
accumulated_record[senator].append(vote_value[vote])
# Transform record into plain dict mapping a senator to a tuple of vote values
record = {senator: tuple(votes) for senator, votes in accumulated_record.items()} # type: Dict[Senator, Tuple[VoteValue, ...]]
# Use k-means to locate the cluster centroids and assign senators to the nearest cluster
centroids = k_means(record.values(), k=3, iterations=50)
clustered_votes = assign_data(centroids, record.values())
# Build a reverse mapping from a pattern of votes to senators who voted that way
votes_to_senators = defaultdict(list) # type: DefaultDict[Tuple[VoteValue, ...], List[Senator]]
for senator, votes in record.items():
votes_to_senators[votes].append(senator)
assert sum(map(len, clustered_votes.values())) == 100
# Display the clusters and the members of each cluster
for i, votes_in_cluster in enumerate(clustered_votes.values(), start=1):
print(f'=========== Voting Cluster #{i} ===========')
party_totals = Counter() # type: Counter
for votes in set(votes_in_cluster):
for senator in votes_to_senators[votes]:
party_totals[senator.party] += 1
print(senator)
print(party_totals)
print()