Skip to content

Commit

Permalink
Merge pull request #17 from metno/6-add-aeronetsda-reading
Browse files Browse the repository at this point in the history
6 add aeronetsda reading
  • Loading branch information
jgriesfeller authored Dec 19, 2023
2 parents 1bc56e0 + 4459ea4 commit 3c721ed
Show file tree
Hide file tree
Showing 8 changed files with 10,503 additions and 214 deletions.
37 changes: 34 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,19 @@ implementations of readers for the pyaerocom project using pyaro as interface
This will install pyaro and pyaro-readers and all their dependencies.

## Supported readers
* aeronetsunreader
Reader for aeronet sun version 3 data.
### aeronetsunreader
Reader for aeronet sun version 3 data (https://aeronet.gsfc.nasa.gov/new_web/download_all_v3_aod.html).
The reader supports reading from an uncompressed local file and from an URL providing a zip file or an
uncompressed file.
If a zip file URL is provided, only the 1st file in there is used (since the
Aeronet provided zip contains all data in a single file)
Aeronet provided zip contains all data in a single file).

### aeronetsdareader
Reader for aeronet SDA version 3 data (https://aeronet.gsfc.nasa.gov/new_web/download_all_v3_aod.html).
The reader supports reading from an uncompressed local file and from an URL providing a zip file, an
uncompressed file or a tar file (including all common compression formats).
If a zip file URL is provided, only the 1st file in there is used (since the
Aeronet provided zip contains all data in a single file).

## Usage
### aeronetsunreader
Expand All @@ -38,6 +45,30 @@ ts.data('AOD_550nm')['altitudes']
ts.data('AOD_550nm')['values']

```
### aeronetsdareader
```python
import pyaro.timeseries
TEST_URL = "https://pyaerocom.met.no/pyaro-suppl/testdata/SDA_Level20_Daily_V3_testdata.tar.gz"
engine = pyaro.list_timeseries_engines()["aeronetsdareader"]
ts = engine.open(TEST_URL, filters=[], fill_country_flag=False)
print(ts.variables())
# stations
ts.data('AODGT1_550nm')['stations']
# start_times
ts.data('AODGT1_550nm')['start_times']
# stop_times
ts.data('AODGT1_550nm')['stop_times']
# latitudes
ts.data('AODGT1_550nm')['latitudes']
# longitudes
ts.data('AODGT1_550nm')['longitudes']
# altitudes
ts.data('AODGT1_550nm')['altitudes']
# values
ts.data('AODGT1_550nm')['values']

```

### geocoder_reverse_natural_earth
geocoder_reverse_natural_earth is small helper to identify country codes for obs networks that don't mention the
countrycode of a station in their location data
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "pyaro_readers"
version = "0.0.1"
version = "0.0.2"
authors = [{ name = "MET Norway" }]
description = "implementations of pyaerocom reading plugings using pyaro as interface'"
readme = "README.md"
Expand All @@ -31,7 +31,7 @@ dependencies = [
]

[tool.setuptools]
packages = ["pyaro_readers.aeronetsunreader", "geocoder_reverse_natural_earth"]
packages = ["pyaro_readers.aeronetsunreader", "pyaro_readers.aeronetsdareader","geocoder_reverse_natural_earth"]
package-dir = {"" = "src"}

[tool.setuptools.package-data]
Expand All @@ -43,6 +43,7 @@ geocoder_reverse_natural_earth = ["*.zip"]

[project.entry-points."pyaro.timeseries"]
"aeronetsunreader" = "pyaro_readers.aeronetsunreader:AeronetSunTimeseriesEngine"
"aeronetsdareader" = "pyaro_readers.aeronetsdareader:AeronetSdaTimeseriesEngine"

[tool.mypy]
python_version = "3.9"
Expand Down
310 changes: 310 additions & 0 deletions src/pyaro_readers/aeronetsdareader/AeronetSdaTimeseriesReader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
import csv
import tarfile
from fnmatch import fnmatch
from io import BytesIO
from urllib.parse import urlparse
from urllib.request import urlopen
from zipfile import BadZipFile, ZipFile

import numpy as np
import requests
from pyaro.timeseries import (
AutoFilterReaderEngine,
Data,
Flag,
NpStructuredData,
Station,
)
from tqdm import tqdm

from geocoder_reverse_natural_earth import (
Geocoder_Reverse_Exception,
Geocoder_Reverse_NE,
)

# default URL
BASE_URL = "https://aeronet.gsfc.nasa.gov/data_push/V3/All_Sites_Times_Daily_Averages_SDA20.zip"
BASE_URL_TAR = (
"https://aeronet.gsfc.nasa.gov/data_push/V3/SDA/SDA_Level20_Daily_V3.tar.gz"
)
# number of lines to read before the reading is handed to Pythobn's csv reader
HEADER_LINE_NO = 7
DELIMITER = ","
#
NAN_VAL = -999.0
# update progress bar every N lines...
PG_UPDATE_LINES = 100
# main variables to store
LAT_NAME = "Site_Latitude(Degrees)"
LON_NAME = "Site_Longitude(Degrees)"
ALT_NAME = "Site_Elevation(m)"
SITE_NAME = "AERONET_Site_Name"
DATE_NAME = "Date_(dd:mm:yyyy)"
TIME_NAME: str = "Time_(hh:mm:ss)"
AOD500_NAME = "Total_AOD_500nm[tau_a]"
AOD500GT1_NAME = "Coarse_Mode_AOD_500nm[tau_c]"
AOD500LT1_NAME = "Fine_Mode_AOD_500nm[tau_f]"
ANG50_NAME = "Angstrom_Exponent(AE)-Total_500nm[alpha]"
ETA50LT1_NAME = "FineModeFraction_500nm[eta]"
AOD550GT1_NAME = "AODGT1_550nm"
AOD550LT1_NAME = "AODLT1_550nm"
AOD550_NAME = "AOD_550nm"

DATA_VARS = [AOD500_NAME, AOD500GT1_NAME, AOD500LT1_NAME, ANG50_NAME, ETA50LT1_NAME]
COMPUTED_VARS = [AOD550GT1_NAME, AOD550LT1_NAME, AOD550_NAME]
# The computed variables have to be named after the read ones, otherwise the calculation will fail!
DATA_VARS.extend(COMPUTED_VARS)

FILL_COUNTRY_FLAG = False

FILE_MASK = "*.ONEILL_lev*"


class AeronetSdaTimeseriesReader(AutoFilterReaderEngine.AutoFilterReader):
def __init__(
self,
filename,
filters=[],
fill_country_flag: bool = FILL_COUNTRY_FLAG,
tqdm_desc: [str, None] = None,
):
"""open a new csv timeseries-reader
:param filename: str
:param filters:
:param fill_country_flag:
:param tqdm_desc:
:param filename_or_obj_or_url: path-like object to csv-file
input file looks like this:
Version 3: SDA Retrieval Level 2.0
The following data are automatically cloud cleared and quality assured with pre-field and post-field calibration applied.
Contact: PI=Pawan Gupta and Elena Lind; PI [email protected] and [email protected]
Daily Averages,UNITS can be found at,,, https://aeronet.gsfc.nasa.gov/new_web/units.html
AERONET_Site,Date_(dd:mm:yyyy),Time_(hh:mm:ss),Day_of_Year,Total_AOD_500nm[tau_a],Fine_Mode_AOD_500nm[tau_f],Coarse_Mode_AOD_500nm[tau_c],FineModeFraction_500nm[eta],2nd_Order_Reg_Fit_Error-Total_AOD_500nm[regression_dtau_a],RMSE_Fine_Mode_AOD_500nm[Dtau_f],RMSE_Coarse_Mode_AOD_500nm[Dtau_c],RMSE_FineModeFraction_500nm[Deta],Angstrom_Exponent(AE)-Total_500nm[alpha],dAE/dln(wavelength)-Total_500nm[alphap],AE-Fine_Mode_500nm[alpha_f],dAE/dln(wavelength)-Fine_Mode_500nm[alphap_f],N[Total_AOD_500nm[tau_a]],N[Fine_Mode_AOD_500nm[tau_f]],N[Coarse_Mode_AOD_500nm[tau_c]],N[FineModeFraction_500nm[eta]],N[2nd_Order_Reg_Fit_Error-Total_AOD_500nm[regression_dtau_a]],N[RMSE_Fine_Mode_AOD_500nm[Dtau_f]],N[RMSE_Coarse_Mode_AOD_500nm[Dtau_c]],N[RMSE_FineModeFraction_500nm[Deta]],N[Angstrom_Exponent(AE)-Total_500nm[alpha]],N[dAE/dln(wavelength)-Total_500nm[alphap]],N[AE-Fine_Mode_500nm[alpha_f]],N[dAE/dln(wavelength)-Fine_Mode_500nm[alphap_f]],Data_Quality_Level,AERONET_Instrument_Number,AERONET_Site_Name,Site_Latitude(Degrees),Site_Longitude(Degrees),Site_Elevation(m),
Cuiaba,16:06:1993,12:00:00,167,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0,0,0,0,0,0,0,0,0,0,0,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000
Cuiaba,17:06:1993,12:00:00,168,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0,0,0,0,0,0,0,0,0,0,0,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000
Cuiaba,19:06:1993,12:00:00,170,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,-999.,0,0,0,0,0,0,0,0,0,0,0,0,lev20,3,Cuiaba,-15.555244,-56.070214,234.000000
"""
self._filename = filename
self._stations = {}
self._data = {} # var -> {data-array}
self._filters = filters
self._header = []
_laststatstr = ""

# check if file is a URL
if self.is_valid_url(self._filename):
# try to open as zipfile
try:
r = requests.get(self._filename)
zip_ref = ZipFile(BytesIO(r.content))
for file in zip_ref.namelist():
with zip_ref.open(file) as response:
lines = [line.decode("utf-8") for line in response.readlines()]
# read only 1st file here
break
except BadZipFile:
# try reading as tar.gz file
# Aeronet's tar files differ from the zip files by providing one file per station instead of one file
# with all stations
# the general format of the data is the same though.
# so we just keep the header lines of the 1st station, and add all data lines of all stations
# That way we get to the same file format as the zip file
r.close()
try:
r = requests.get(self._filename)
with tarfile.open(fileobj=BytesIO(r.content), mode="r") as tf:
lines = []
_fidx = 0
members = tf.getmembers()
bar = tqdm(desc="extracting tar file...", total=len(members))
for _midx, member in enumerate(members):
if fnmatch(member.name, FILE_MASK):
bar.update(1)
f = tf.extractfile(member)
if _fidx == 0:
lines.extend(
[line.decode("utf-8") for line in f.readlines()]
)
_fidx += 1
else:
# skip the header lines
for _hidx in range(HEADER_LINE_NO):
dummy = f.readline()
lines.extend(
[line.decode("utf-8") for line in f.readlines()]
)
else:
continue

# too many possible exceptions due to different tar possible tar file
# compressions. Just try to read as text if everything fails
except:
# read as text file
r.close()
try:
response = urlopen(self._filename)
lines = [line.decode("utf-8") for line in response.readlines()]
except Exception as e:
print(e)

else:
with open(self._filename, newline="") as csvfile:
lines = csvfile.readlines()

for _hidx in range(HEADER_LINE_NO - 1):
self._header.append(lines.pop(0))
# get fields from header line although csv can do that as well since we might want to adjust these names
self._fields = lines.pop(0).strip().split(",")

gcd = Geocoder_Reverse_NE()
crd = csv.DictReader(lines, fieldnames=self._fields, delimiter=DELIMITER)
bar = tqdm(desc=tqdm_desc, total=len(lines))
for _ridx, row in enumerate(crd):
bar.update(1)
if row[SITE_NAME] != _laststatstr:
_laststatstr = row[SITE_NAME]
# new station
station = row[SITE_NAME]
lon = float(row[LON_NAME])
lat = float(row[LAT_NAME])
alt = float(row["Site_Elevation(m)"])
if fill_country_flag:
try:
country = gcd.lookup(lat, lon)["ISO_A2_EH"]
except Geocoder_Reverse_Exception:
country = "NN"
else:
country = "NN"

# units of Aeronet data are always 1
units = "1"
if not station in self._stations:
self._stations[station] = Station(
{
"station": station,
"longitude": lon,
"latitude": lat,
"altitude": alt,
"country": country,
"url": "",
"long_name": station,
}
)
# every line contains all variables, sometimes filled with NaNs though
if _ridx == 0:
for variable in DATA_VARS:
if variable in self._data:
da = self._data[variable]
if da.units != units:
raise Exception(
f"unit change from '{da.units}' to 'units'"
)
else:
da = NpStructuredData(variable, units)
self._data[variable] = da

day, month, year = row[DATE_NAME].split(":")
datestring = "-".join([year, month, day])
datestring = "T".join([datestring, row[TIME_NAME]])
start = np.datetime64(datestring)
end = start

ts_dummy_data = {}
for variable in DATA_VARS:
try:
value = float(row[variable])
if value == NAN_VAL:
value = np.nan
# store value in ts_dummy_data, so we don't need to perform the nan check
# for each component of calculated values again
ts_dummy_data[variable] = value
except KeyError:
# computed variable
if variable == AOD550GT1_NAME:
value = self.compute_od_from_angstromexp(
0.55,
ts_dummy_data[AOD500GT1_NAME],
0.50,
ts_dummy_data[ANG50_NAME],
)
elif variable == AOD550LT1_NAME:
value = self.compute_od_from_angstromexp(
0.55,
ts_dummy_data[AOD500LT1_NAME],
0.50,
ts_dummy_data[ANG50_NAME],
)
elif variable == AOD500_NAME:
value = self.compute_od_from_angstromexp(
0.55,
ts_dummy_data[AOD500_NAME],
0.50,
ts_dummy_data[ANG50_NAME],
)
self._data[variable].append(
value, station, lat, lon, alt, start, end, Flag.VALID, np.nan
)
bar.close()

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]

def _unfiltered_stations(self) -> dict[str, Station]:
return self._stations

def _unfiltered_variables(self) -> list[str]:
return list(self._data.keys())

def close(self):
pass

def compute_od_from_angstromexp(
self, to_lambda: float, od_ref: float, lambda_ref: float, angstrom_coeff: float
) -> float:
"""Compute AOD at specified wavelength
Uses Angstrom coefficient and reference AOD to compute the
corresponding wavelength shifted AOD
Parameters
----------
to_lambda : :obj:`float` or :obj:`ndarray`
wavelength for which AOD is calculated
od_ref : :obj:`float` or :obj:`ndarray`
reference AOD
lambda_ref : :obj:`float` or :obj:`ndarray`
wavelength corresponding to reference AOD
angstrom_coeff : :obj:`float` or :obj:`ndarray`
Angstrom coefficient
Returns
-------
:obj:`float` or :obj:`ndarray`
AOD(s) at shifted wavelength
"""
return od_ref * (lambda_ref / to_lambda) ** angstrom_coeff

def is_valid_url(self, url):
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except ValueError:
return False


class AeronetSdaTimeseriesEngine(AutoFilterReaderEngine.AutoFilterEngine):
def reader_class(self):
return AeronetSdaTimeseriesReader

def open(self, filename, *args, **kwargs) -> AeronetSdaTimeseriesReader:
return self.reader_class()(filename, *args, **kwargs)

def description(self):
return "Simple reader of AeronetSDA-files using the pyaro infrastructure"

def url(self):
return "https://github.com/metno/pyaro-readers"
4 changes: 4 additions & 0 deletions src/pyaro_readers/aeronetsdareader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .AeronetSdaTimeseriesReader import (
AeronetSdaTimeseriesEngine,
AeronetSdaTimeseriesReader,
)
Loading

0 comments on commit 3c721ed

Please sign in to comment.