forked from KamelMoohamed/DISCOVER-DSCOVR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
86 lines (68 loc) · 2.92 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from datetime import datetime
import requests
import os
import gzip
import netCDF4
import numpy as np
import pandas as pd
import tensorflow as tf
def fetch_data(start_date_string):
# date as "2021-10-05"
print(start_date_string)
modified_string = start_date_string.replace("-", "")
if not os.path.exists("downloads/processed/"+modified_string+".csv"):
print("downloads/processed/"+modified_string+".csv")
start_date_obj = datetime.strptime(start_date_string, '%Y-%m-%d')
start_timestamp = int(start_date_obj.timestamp()) * 1000
end_timestamp = int(start_timestamp+172799999000)
api_url = "https://www.ngdc.noaa.gov/dscovr-data-access/files?start_date=" + \
str(start_timestamp)+"&end_date="+str(end_timestamp)
try:
response = requests.get(api_url)
if response.status_code == 200:
data = response.json()
else:
print(f"Error: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")
except ValueError as e:
print(f"JSON decoding error: {e}")
response = requests.get(data[str(modified_string)]["mg1"])
input_path = os.path.join(
"downloads", "raw_data", (str(modified_string) + ".nc.gz"))
output_path = os.path.join(
"downloads", "raw_data", (str(modified_string) + ".nc"))
open(input_path, "wb").write(response.content)
with gzip.open(input_path, 'rb') as compressed_file:
with open(output_path, 'wb') as extracted_file:
extracted_file.write(compressed_file.read())
data_nc = netCDF4.Dataset(output_path)
dsc_cols = ['bx_gse', 'by_gse', 'bz_gse', 'bx_gsm', 'by_gsm', 'bz_gsm']
arrays = {}
for col in dsc_cols:
var = data_nc.variables[col]
arrays[f'dsc_{col}'] = np.array(var[::92])
data_df = pd.DataFrame(arrays)
processed_data = sequence_creation(data_df.values)
model = tf.keras.models.load_model("models/mapping_mag.h5")
mapped_data = model.predict(processed_data)
mapped_data.to_csv("downloads/processed/" +
modified_string+".csv", index=False)
# os.remove(input_path)
# os.remove(output_path)
def forcast_data(data, model, size=100):
n_features = data.shape[1]
output = np.zeros((size, data.shape[1]))
for i in range(size):
point = model(np.array([data]))[0]
output[i] = point
data = np.append(data[1:], point).reshape(-1, n_features)
return output
def sequence_creation(data, num_features=6, sequence_length=10):
sequences = []
for i in range(len(data) - sequence_length + 1):
sequence = data[i:i + sequence_length]
sequences.append(sequence)
sequences = np.array(sequences)
output = sequences.reshape(-1, sequence_length, num_features)
return output