-
Notifications
You must be signed in to change notification settings - Fork 7
/
benchmark.py
110 lines (93 loc) · 3.84 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import re
import glob
import sys
import numpy as np
import pandas as pd
import xarray as xr
import xesmf as xe
import salem
# ---------------
# setup choice
use_china_measurements = True
use_openaq = False
countries = ['CN']
parameters = {
'pm25': 'PM2_5_DRY',
'o3': 'o3'
}
# ---------------
# data
path = os.getcwd()
wrfout_file = sys.argv[1]
year = re.findall(r'wrfout_d01_\d+', wrfout_file)[0][-4:]
month = re.findall(r'wrfout_d01_\d+-\d+', wrfout_file)[0][-2:]
grid_rectilinear_global = xr.Dataset(
{
"lat": (["lat"], np.arange(-60, 85, 0.25)),
"lon": (["lon"], np.arange(-180, 180, 0.25)),
}
)
# ---------------
# processing
if use_china_measurements:
obs_source = 'china_measurements'
if year in ['2014', '2015', '2016', '2017', '2020']:
obs_filename = f'/nobackup/WRFChem/measurements/china_measurements_corrected/df_obs_summary_{year}.csv'
elif use_openaq:
obs_source = 'openaq'
if year in ['2013', '2014', '2015', '2016', '2017', '2020']:
obs_filename = f'/nobackup/WRFChem/openaq/csv/openaq_data_{year}_noduplicates.csv'
elif year in ['2018', '2019']:
if int(month) < 7:
obs_filename = f'/nobackup/WRFChem/openaq/csv/openaq_data_{year}_part1_noduplicates.csv'
else:
obs_filename = f'/nobackup/WRFChem/openaq/csv/openaq_data_{year}_part2_noduplicates.csv'
df_obs = pd.read_csv(obs_filename, index_col="date.utc", parse_dates=True)
ds_wrfout = salem.open_wrf_dataset(wrfout_file)
year_month_day = str(ds_wrfout.time.values)[2:12]
hour = str(ds_wrfout.time.values)[13:15]
df_obs_dt = df_obs[year_month_day].loc[df_obs[year_month_day].index.hour == int(hour)]
for country in countries:
df_obs_dt_country = df_obs_dt.loc[df_obs_dt.country == country]
for parameter in parameters.keys():
ds_wrfout_parameter = ds_wrfout[parameters[parameter]].isel(bottom_top=0)
have_weights = len(glob.glob(f'{path}/bilinear*')) != 0
regridder = xe.Regridder(ds_wrfout_parameter, grid_rectilinear_global, "bilinear", reuse_weights=have_weights)
ds_wrfout_parameter_regrid = regridder(ds_wrfout_parameter)
df_obs_dt_country_parameter = df_obs_dt_country.loc[df_obs_dt_country.parameter == parameter]
df_obs_dt_country_parameter.replace(-999, np.nan, inplace=True)
datetimes = []
lats = []
lons = []
obs_values = []
wrf_values = []
for index, obs in df_obs_dt_country_parameter.iterrows():
lat = obs['coordinates.latitude']
lon = obs['coordinates.longitude']
if parameter == 'o3': # convert units to ppb
if 'g/m' in obs.unit:
obs_value = obs.value / 1.9957
elif 'ppm' in obs.unit:
obs_value = obs.value * 1000
wrf_value = ds_wrfout_parameter_regrid.sel(lat=lat, lon=lon, method='nearest').values[0] * 1000
else:
obs_value = obs.value
wrf_value = ds_wrfout_parameter_regrid.sel(lat=lat, lon=lon, method='nearest').values[0]
datetimes.append(np.datetime64(f'{year_month_day}T{hour}:00:00'))
lats.append(lat)
lons.append(lon)
obs_values.append(obs_value)
wrf_values.append(wrf_value)
df_values = pd.DataFrame.from_dict({
'datetime': datetimes,
'lat': np.array(lats),
'lon': np.array(lons),
'obs_value': np.array(obs_values),
'wrf_value': np.array(wrf_values)
})
df_values.set_index('datetime', inplace=True)
df_values.loc[df_values.obs_value <= 0, 'obs_value'] = np.nan
df_values.loc[df_values.wrf_value <= 0, 'wrf_value'] = np.nan
df_values.dropna(inplace=True)
df_values.to_csv(f'{path}/df_values_{obs_source}_{year_month_day}-{hour}_{parameter}_{country}.csv')