-
Notifications
You must be signed in to change notification settings - Fork 9
/
CHIRPS.py
225 lines (182 loc) · 9.1 KB
/
CHIRPS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/usr/bin/env python
#######################################
#Script developed by Oscar Castillo
#University of Florida
#2022
#Contact email: [email protected]
#######################################
import os, sys
from osgeo import ogr, gdal
from osgeo.gdalconst import *
import numpy
import pandas as pd
from datetime import datetime, timedelta
import joblib
import requests
from dateutil.relativedelta import relativedelta
# register all of the GDAL drivers
gdal.AllRegister()
#####Download CHIRPS data
#Corrected data
def get_correc_nc(dt_s, dt_e, out_cor_nc):
s = requests.Session()
s.mount("https://data.chc.ucsb.edu", requests.adapters.HTTPAdapter(max_retries=10))
diff_month = (dt_e.year - dt_s.year) * 12 + (dt_e.month - dt_s.month)
for n in range(diff_month+1):
yymm = dt_s + relativedelta(months=+n)
yy = yymm.strftime("%Y")
mm = yymm.strftime("%m")
try:
#Monthly basis
response = s.get('https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/netcdf/p05/by_month/chirps-v2.0.'
+ yy + '.' + mm + '.days_p05.nc', timeout=80)
#Yearly basis
# response = s.get('https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/netcdf/p05/chirps-v2.0.'
# + dt_st_y + '.days_p05.nc', timeout=80)
response.raise_for_status()
if response.ok:
# Create target Directory if it doesn't exist.
if not os.path.exists(out_cor_nc):
os.mkdir(out_cor_nc)
open(out_cor_nc + '/corr_chirps_' + yy + mm + '.nc', 'wb').write(response.content)
print('corr_chirps_' + yy + mm + ".nc file downloaded.")
# open(out_cor_nc + '/corr_chirps_' + dt_st_y + '.nc', 'wb').write(response.content)
# print('corr_chirps_' + dt_st_y + ".nc file downloaded.")
except requests.exceptions.HTTPError as err:
pass
# raise SystemExit(err)
response.close() #Close the connection with the server.
#Preliminary data
def get_prelim_nc(dt_s, dt_e, out_pre_nc):
s = requests.Session()
s.mount("https://data.chc.ucsb.edu", requests.adapters.HTTPAdapter(max_retries=10))
for y in range(dt_e.year - dt_s.year + 1):
single_y = str(dt_s.year + y)
try:
response = s.get('https://data.chc.ucsb.edu/products/CHIRPS-2.0/prelim/global_daily/fixed/netcdf/chirps-v2.0.'
+ single_y + '.days_p05.nc', timeout=80)
response.raise_for_status()
if response.ok:
# Create target Directory if it doesn't exist.
if not os.path.exists(out_pre_nc):
os.mkdir(out_pre_nc)
open(out_pre_nc + '/prelim_nc_' + single_y + '.nc', 'wb').write(response.content)
print('prelim_nc_' + single_y + ".nc file downloaded.")
except requests.exceptions.HTTPError as err:
pass
# raise SystemExit(err)
response.close() # Close the connection with the server.
#To read corrected and preliminary data and then merge them properly.
def precpkl(outdir_prec):
df1 = joblib.load(outdir_prec + '/prec_corr.pkl')
df2 = joblib.load(outdir_prec + '/prec_prelim.pkl')
last_dt_corr = df1.columns[-1]
dt_s_pre = datetime.strptime(last_dt_corr, '%Y%j') + timedelta(days=1)
dt_st_p = dt_s_pre.strftime('%Y%j')
df3 = df2.loc[:, dt_st_p:]
result = pd.concat([df1, df3], axis=1)
joblib.dump(result, outdir_prec + '/prec.pkl')
#Intended for long time series but few points (<10000)
def chirps1(in_file, in_nc_dir, outprec_file):
nc_lst = os.listdir(in_nc_dir) # To list all .sol files in the input folder.
nc_lst.sort() # Sort the files in a sequential date
df_chirps = pd.DataFrame()
# To read the input CSV file
with open(in_file, "r") as f1:
in_pt = [line for line in f1.readlines() if line.strip()]
for row in in_pt[1:]:
id = int(row.split(',')[0])
lat = float(row.split(',')[1])
lon = float(row.split(',')[2])
time_lst = []
precval = []
for nc_file in nc_lst:
if nc_file.endswith(".nc"):
# open the image file
dsi = gdal.Open(in_nc_dir + "/" + nc_file, GA_ReadOnly)
if dsi is None:
print('Could not open NetCDF file')
sys.exit(1)
meta_nc = dsi.GetMetadata() # To get metadata of the file
date_start = meta_nc['time#units'][
-14:] # The origin date of the file (For CHIRPS '1980-1-1 0:0:0')
datetime_st = datetime.strptime(date_start, '%Y-%m-%d %H:%M:%S')
bands_time = meta_nc['NETCDF_DIM_time_VALUES'][1:-1].split(
',') # Get the band timestamp point of the file. "[1:-1]" removes the first and last character
bands_time = list(map(int, bands_time)) # Convert all strings in a list of integers.
# Geotransformation
gt = dsi.GetGeoTransform()
px = int((lon - gt[0]) / gt[1])
py = int((lat - gt[3]) / gt[5])
# get image size
#rowsY = dsi.RasterYSize
#colsX = dsi.RasterXSize
bands = dsi.RasterCount
for i in range(1, bands + 1):
d = dsi.GetRasterBand(i).ReadAsArray(px, py, 1, 1)
dt_st = datetime_st + timedelta(days=bands_time[i - 1]) # Get the band timestamp in date format
band_t = dt_st.strftime('%Y%j') #strftime() to reformat the date to DSSAT format.
time_lst.append(band_t)
if d is None:
d = numpy.float32([[-9999.0]])
precval.append(d[0][0])
df_chirps[id] = precval
df_chirps = df_chirps.T # Array transpose
df_chirps.index.name = 'ID' # Set an index name
df_chirps.columns = time_lst # Set column names as actual dates.
if not os.path.exists(os.path.dirname(outprec_file)):
os.mkdir(os.path.dirname(outprec_file))
joblib.dump(df_chirps, outprec_file)
#Intended for short time series (< 2 years) but many points. It requires more RAM memory available.
def chirps2(in_file, in_nc_dir, outprec_file):
nc_lst = os.listdir(in_nc_dir) #To list all .sol files in the input folder.
nc_lst.sort() #Sort the files in a sequential date
data = {}
time_lst = []
start2 = datetime.now()
#Loop through dates
for nc_file in nc_lst:
if nc_file.endswith(".nc"):
start3 = datetime.now()
# open the image file
dsi = gdal.Open(in_nc_dir + "/" + nc_file, GA_ReadOnly)
print(nc_file)
if dsi is None:
print('Could not open NetCDF file')
sys.exit(1)
meta_nc = dsi.GetMetadata() #To get metadata of the file
date_start = meta_nc['time#units'][-14:] #The origin date of the file (For CHIRPS '1980-1-1 0:0:0')
datetime_st = datetime.strptime(date_start, '%Y-%m-%d %H:%M:%S')
bands_time = meta_nc['NETCDF_DIM_time_VALUES'][1:-1].split(',') #Get the band timestamp point of the file. "[1:-1]" removes the first and last character
bands_time = list(map(int, bands_time)) #Convert all strings in a list of integers.
#Geotransformation data
gt = dsi.GetGeoTransform()
# get image size
rowsY = dsi.RasterYSize
colsX = dsi.RasterXSize
bands = dsi.RasterCount
for i in range(1, bands+1):
dt_st = datetime_st + timedelta(days=bands_time[i-1]) #Get the band timestamp in date format
band_t = dt_st.strftime('%Y%j') # we use strftime() to reformat the date to DSSAT format.
data[band_t] = dsi.GetRasterBand(i).ReadAsArray(0, 0, colsX, rowsY)
time_lst.append(band_t)
end3 = datetime.now()
print("Time of execution for", nc_file, "is:", str(end3-start3))
#Reads an input CSV file as an array.
pt = pd.read_csv(in_file)
id = pt['ID'].to_numpy()
lon = pt['Longitude'].to_numpy()
lat = pt['Latitude'].to_numpy()
px = ((lon - gt[0]) / gt[1]).astype(int)
py = ((lat - gt[3]) / gt[5]).astype(int)
df_chirps = pd.DataFrame()
df_chirps['ID'] = id
df_chirps = df_chirps.set_index('ID')
for t in time_lst:
prec_val = data[t][py, px]
df_chirps[t] = prec_val
if not os.path.exists(os.path.dirname(outprec_file)):
os.mkdir(os.path.dirname(outprec_file))
joblib.dump(df_chirps, outprec_file)
end2 = datetime.now()
print("Time of execution for reading the netCDF file: ", str(end2-start2))