This repository has been archived by the owner on Feb 27, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_exploration_2017.py
81 lines (65 loc) · 2.2 KB
/
data_exploration_2017.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
sats = ['2017_1_Saturday.csv', '2018_1_Saturday.csv']
suns = ['2017_1_Sunday.csv', '2018_1_Sunday.csv']
weeks = ['2017_1_Weekday.csv', '2018_1_Weekday.csv']
def clean_value(text):
text = str(text)
text = text.replace('\t', '').replace(',', '').strip()
if text == '-':
text = '0'
value = float(text)
value = int(value)
return value
def open_csv(filename):
with open(filename) as f:
data = pd.read_csv(f, index_col=0)
data = data.iloc[0:47,0:47]
data = data.applymap(clean_value)
return data
sat_dfs = [open_csv(f) for f in sats]
sun_dfs = [open_csv(f) for f in suns]
week_dfs = [open_csv(f) for f in weeks]
#saturday 2017 vs 2018 comparison
jan17_sat_entry = sat_dfs[0].iloc[[-1]].transpose()
jan18_sat_entry = sat_dfs[1].iloc[[-1]].transpose()
sat_entry = jan17_sat_entry.copy()
sat_entry.columns = ['2017']
sat_entry['2018'] = jan18_sat_entry.divide(jan17_sat_entry)*100
sat_entry['2017'] = 100
#sunday 2017 vs 2018 comparison
jan17_sun_entry = sun_dfs[0].iloc[[-1]].transpose()
jan18_sun_entry = sun_dfs[1].iloc[[-1]].transpose()
sun_entry = jan17_sun_entry.copy()
sun_entry.columns = ['2017']
sun_entry['2018'] = jan18_sun_entry.divide(jan17_sun_entry)*100
sun_entry['2017'] = 100
#weekday 2017 vs 2018 comparison
jan17_week_entry = week_dfs[0].iloc[[-1]].transpose()
jan18_week_entry = week_dfs[1].iloc[[-1]].transpose()
week_entry = jan17_week_entry.copy()
week_entry.columns = ['2017']
week_entry['2018'] = jan18_week_entry.divide(jan17_week_entry)*100
week_entry['2017'] = 100
plt.plot(sat_entry.transpose())
plt.title('Saturday: Stations as entries')
plt.ylabel('Normalized Ridership (%)')
plt.ylim((50,120))
plt.xlabel('Year')
plt.savefig('saturday_2017v2018.png')
plt.plot(sun_entry.transpose())
plt.title('Sunday: Stations as entries')
plt.ylabel('Normalized Ridership (%)')
plt.ylim((50,120))
plt.xlabel('Year')
plt.savefig('sunday_2017v2018.png')
plt.plot(week_entry.transpose())
plt.title('Weekdays: Stations as entries')
plt.ylabel('Normalized Ridership (%)')
plt.ylim((50,120))
plt.xlabel('Year')
plt.savefig('weekdays_2017v2018.png')
#print(sat_dfs)
#print(sun_dfs)
#print(week_dfs)