-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain-api.py
141 lines (109 loc) · 4.57 KB
/
main-api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#%%
import json, re, os, pandas as pd
from shutil import rmtree
## Functions
unique_vals = lambda l: l.unique().tolist()
def sanitize_data(data):
data = re.sub('\-+', '-', string=re.sub(pattern='[^A-Za-z]', repl='-', string=data.lower()))
if len(data) > 0:
data = data[:-1] if data[-1] == '-' else data
return data
def export(data, api):
print(f'> Exporting: {api}')
# Export to CSV
try:
data.to_csv(f'api/{api}.csv', index=False)
except Exception as e:
print('[Error] - ', e)
# Export to JSON
try:
data = data.to_json(orient='records')
f = open(f'api/{api}.json', 'w', encoding='utf-8')
f.write(json.dumps(json.loads(data)))
f.close()
except Exception as e:
print('[Error] - ', e)
### CASES
# Build a single dataframe from 3 types of datasets
dfs = {}
for t in ['confirmed', 'deaths', 'recovered']:
# Get one type of dataset
df = pd.read_csv(f'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{t}_global.csv')
# Reshape! This is why I'm not using CSSEGISandData's data directly.
df = df.melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long']).fillna('')
df.rename(columns={
'Province/State':'state',
'Country/Region': 'country',
'Lat': 'lat',
'Long': 'long',
'variable': 'date',
'value': t
}, inplace=True)
df['date'] = pd.to_datetime(df['date'])
df['date'] = df['date'].dt.strftime('%Y-%m-%d')
df['country'] = df['country'].apply(sanitize_data)
df['state'] = df['state'].apply(sanitize_data)
df[t] = df[t].apply(lambda v: int(v))
df_grouped = df.groupby(['country', 'state', 'lat', 'long'])
new_dfs = []
for k,grouped_df in df_grouped:
grouped_df = grouped_df.reset_index()
grouped_df.loc[:,f'{t}_new'] = grouped_df[t] - grouped_df[t].shift(1, fill_value=0)
new_dfs.append(grouped_df)
df = pd.concat(new_dfs)
if 'index' in df.columns:
del df['index']
export(data=df, api=f'cases/{t}')
dfs[t] = df
joinCols = ['state', 'country', 'lat', 'long', 'date']
groupByCols = {'confirmed':'sum', 'deaths':'sum', 'recovered':'sum', 'confirmed_new':'sum', 'deaths_new':'sum', 'recovered_new':'sum'}
df = pd.merge(dfs['confirmed'], dfs['deaths'], how='left', on=joinCols)
df = pd.merge(df, dfs['recovered'], how='left', on=joinCols).fillna(0)
df = df.loc[:,['date', 'country', 'state', 'lat', 'long', 'confirmed', 'confirmed_new', 'deaths', 'deaths_new', 'recovered', 'recovered_new']]
# Get latest date
today = df['date'].iloc[-1]
# Everything
export(data=df, api='cases/all')
# Global Level
df_global = df.groupby(['date']).agg(groupByCols).reset_index()
export(data=df_global, api='cases/global')
# Country Level
df_country = df.groupby(['date','country']).agg(groupByCols).reset_index()
export(data=df_country, api='cases/country')
### COUNTRIES
for country in unique_vals(df['country']):
# Remove US data. Will be created later.
if country != 'us':
# Export country data
df_tmp_country = df[df['country'] == country]
df_tmp_country_main = df[df['country'] == country].groupby(['date', 'country', 'state', 'lat', 'long']).agg(groupByCols).reset_index()
export(data=df_tmp_country_main, api=f'country/{country}')
# Export state data
if os.path.isdir(f'api/country/{country}'):
rmtree(f'api/country/{country}')
os.mkdir(f'api/country/{country}')
for state in unique_vals(df_tmp_country['state']):
state = state if state else country
df_tmp_state = df_tmp_country[df_tmp_country['state'] == state]
export(data=df_tmp_state, api=f'country/{country}/{state}')
### DATE
for Date in unique_vals(df['date']):
df_date = df[df['date'] == Date]
export(data=df_date, api=f'date/{Date}')
### DIMENSIONS
countries_states = df[df['date'] == today][['country', 'state', 'lat', 'long']]
for dim in ['country', 'state', 'date']:
df_dim = pd.DataFrame({ dim: unique_vals(df[dim]) })
export(data=df_dim, api=f'dimensions/{dim}')
export(data=countries_states, api=f'dimensions/countries_states')
#%%
# ## STATS - WIP
# TODO - Create datasets for everything based on charts required
# stats = {}
# # Now
# stats['now'] = {}
# stats['now']['global'] = df_global[df_global['date'] == today]
# stats['now']['countries'] = df_country[df_country['date'] == today]
# # N days ago
# for Date in df['date'].unique().tolist():
# df_date = df[df['date'] == Date]