-
Notifications
You must be signed in to change notification settings - Fork 1
/
processData.py
47 lines (33 loc) · 1.24 KB
/
processData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/local/python
import pandas as pd
import json
from pprint import pprint
# IMPORTING DATA
df = pd.read_csv("_data/industries.csv")
df2 = pd.read_csv("_data/capitals.csv")
with open('_geo/saudi_comp.json') as data_file:
data = json.load(data_file)
# Let's use df as the master spelling
# ['Riyadh' 'Makkah' 'Eastern Province' 'Qassim' 'Madinah' 'Asir' 'Tabuk'
# 'Hail' 'Northern Border' 'Jazab' 'Al Baha' 'Al Joaf' 'Najran']
# FOR SLACK
print "Province names as they appear in By Industry chart (industries.csv):"
print df['region'].unique()
print "\n"
print "Province names as they appear in By Province chart (capitals.csv):"
print df2['province'].unique()
print "\n"
print "Province names as they appear in the shapefiles:"
shapenames = []
for item in data['objects']['saudi']['geometries']:
shapenames.append(item['id'])
print shapenames
print "\n"
# Merging capital lat/lon data into industries
df = df.rename(columns={'region': 'province'})
coords = df2[['province','lat','lon']]
# I think non-merged obs are dropping
result = pd.merge(coords, df, on=['province'], how='inner')
result = result.drop(['%Saudi', '%Non-Saudi'], 1)
result = result.rename(columns={'Total ': 'total'})
result.to_csv('_data/donuts.csv', index=False)