Skip to content

Commit

Permalink
WIP: make all readers imporatble with separate read() method
Browse files Browse the repository at this point in the history
Jan Griesfeller committed Oct 31, 2024
1 parent 59c4f7f commit 5f243e6
Showing 7 changed files with 63 additions and 22 deletions.
21 changes: 15 additions & 6 deletions src/pyaro_readers/eeareader/EEATimeseriesReader.py
Original file line number Diff line number Diff line change
@@ -72,9 +72,15 @@ def __init__(
self._data = {} # var -> {data-array}
self._set_filters(filters)

self.metadata = self._read_metadata(filename)
self._metadata = self._read_metadata(filename)
self._filters = filters

self._read_polars(filters, filename)
def read(self):
"""reading method quick and dirty"""
self._read_polars(self._filters, self._filename)

def metadata(self) -> dict[str, str]:
return self._metadata

def _read_polars(self, filters, filename) -> None:
try:
@@ -163,7 +169,7 @@ def _read_polars(self, filters, filename) -> None:
continue
df = lf

station_metadata = self.metadata[df.row(0)[0].split("/")[-1]]
station_metadata = self._metadata[df.row(0)[0].split("/")[-1]]

file_unit = df.row(0)[df.get_column_index("Unit")]

@@ -223,7 +229,7 @@ def _filter_dates(
)

def _read_metadata(self, folder: str) -> dict:
metadata = {}
_metadata = {}
filename = Path(folder) / "metadata.csv"
if not filename.exists():
raise FileExistsError(f"Metadata file could not be found in {folder}")
@@ -237,15 +243,15 @@ def _read_metadata(self, folder: str) -> dict:
alt = float(words[5])
except:
continue
metadata[words[0]] = {
_metadata[words[0]] = {
"lon": lon,
"lat": lat,
"alt": alt,
"stationcode": words[2],
"country": words[1],
}

return metadata
return _metadata

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]
@@ -272,3 +278,6 @@ def description(self):

def url(self):
return "https://github.com/metno/pyaro-readers"

def read(self):
return self.reader_class().read()
8 changes: 8 additions & 0 deletions src/pyaro_readers/netcdf_rw/Netcdf_RWTimeseries.py
Original file line number Diff line number Diff line change
@@ -55,6 +55,9 @@ def __init__(
else:
os.path.makedirs(filename)

def read(self):
"""standard read method"""

dataglob = os.path.join(self._directory, f"{self.ncfile_prefix}.????.nc")
self._years = set()
for file in glob.iglob(dataglob):
@@ -76,6 +79,7 @@ def iterate_files(self):
if os.path.exists(file_path):
yield file_path


def metadata(self):
metadata = dict()
date = datetime.datetime.min
@@ -371,3 +375,7 @@ def description(self) -> str:

def url(self):
return "https://github.com/metno/pyaro-readers"

def read(self):
return self.reader_class().read()

Original file line number Diff line number Diff line change
@@ -73,25 +73,31 @@ def __init__(
self._set_filters(filters)
self._header = []
self._revision = datetime.datetime.min
self._filename = filename
self._fill_country_flag = fill_country_flag
self._file_mask = file_mask
self._tqdm_desc = tqdm_desc

if Path(filename).is_file():
self._filename = filename
self._process_file(self._filename, fill_country_flag)
def read(self):
""" read method"""

elif Path(filename).is_dir():
files_pathlib = Path(filename).glob(file_mask)
if Path(self._filename).is_file():
self._process_file(self._filename, self._fill_country_flag)

elif Path(self._filename).is_dir():
files_pathlib = Path(self._filename).glob(self._file_mask)
files = [x for x in files_pathlib if x.is_file()]

if len(files) == 0:
raise ValueError(
f"Could not find any nas files in given folder {self._filename}"
)
bar = tqdm(desc=tqdm_desc, total=len(files))
bar = tqdm(desc=self._tqdm_desc, total=len(files), disable=None)
for file in files:
bar.update(1)
self._process_file(file, fill_country_flag)
self._process_file(file, self._fill_country_flag)
else:
raise ValueError(f"Given filename {filename} is neither a folder or a file")
raise ValueError(f"Given filename {self._filename} is neither a folder or a file")

def _process_file(self, file: Path, fill_country_flag: bool = FILL_COUNTRY_FLAG):
with open(file, newline="") as f:
@@ -231,3 +237,6 @@ def description(self):

def url(self):
return "https://github.com/metno/pyaro-readers"

def read(self):
return self.reader_class().read()
26 changes: 18 additions & 8 deletions src/pyaro_readers/nilupmfebas/EbasPmfReader.py
Original file line number Diff line number Diff line change
@@ -45,6 +45,10 @@ def __init__(
self._variables = {}
self._metadata = {}
self._revision = datetime.datetime.min
self._filename = filename
self._filemask = filemask
self._vars_to_read = vars_to_read
self._tqdm_desc = tqdm_desc

# variable include filter comes like this
# {'variables': {'include': ['PM10_density']}}
@@ -55,26 +59,29 @@ def __init__(
self._vars_to_read = vars_to_read
logger.info(f"applying variable include filter {vars_to_read}...")

realpath = Path(filename).resolve()
self._realpath = Path(filename).resolve()

if Path(realpath).is_dir():
def read(self):
"""read method"""

if Path(self._realpath).is_dir():
# search directory for files
files = list(realpath.glob(filemask))
bar = tqdm(desc=tqdm_desc, total=len(files))
files = list(self._realpath.glob(self._filemask))
bar = tqdm(desc=self._tqdm_desc, total=len(files), disable=None)

for _ridx, file in enumerate(files):
bar.update(1)
logger.info(file)
self.read_file(file, vars_to_read=vars_to_read)
self.read_file(file, vars_to_read=self._vars_to_read)
if _ridx > 30:
assert True

bar.close()
elif Path(realpath).is_file():
self.read_file(realpath)
elif Path(self._realpath).is_file():
self.read_file(self._realpath)
else:
# filename is something else
raise EBASPMFReaderException(f"No such file or directory: {filename}")
raise EBASPMFReaderException(f"No such file or directory: {self._filename}")

def metadata(self):
metadata = dict()
@@ -266,6 +273,9 @@ def description(self):
def url(self):
return "https://github.com/metno/pyaro-readers"

def read(self):
return self.reader_class().read()


class ReadEbasOptions(dict):
"""Options for EBAS reading routine
1 change: 1 addition & 0 deletions tests/test_EEAReader.py
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@ def test_1open_files(self):
self.testdata_dir,
filters={"variables": {"include": ["PM10", "SO2"]}},
) as ts:
ts.read()
self.assertGreaterEqual(len(ts.variables()), 2)
self.assertGreaterEqual(len(ts.stations()), 2)
for var in ts.variables():
2 changes: 2 additions & 0 deletions tests/test_EbasPmfTimeSeriesReader.py
Original file line number Diff line number Diff line change
@@ -54,6 +54,7 @@ def test_0engine(self):

def test_1open_single_file(self):
with pyaro.open_timeseries(self.engine, self.file, filters=[]) as ts:
ts.read()
self.assertGreaterEqual(len(ts.variables()), 1)
for var in ts.variables():
assert var in self.test_vars
@@ -63,6 +64,7 @@ def test_1open_single_file(self):

def test_2open_directory(self):
with pyaro.open_timeseries(self.engine, self.testdata_dir, filters=[]) as ts:
ts.read()
self.assertGreaterEqual(len(ts.variables()), 1)
for var in ts.variables():
assert var in self.test_vars
2 changes: 2 additions & 0 deletions tests/test_NILUPMFAbsorptionReader.py
Original file line number Diff line number Diff line change
@@ -26,6 +26,7 @@ def test_0engine(self):

def test_1open_single_file(self):
with pyaro.open_timeseries(self.engine, self.file, filters=[]) as ts:
ts.read()
self.assertGreaterEqual(len(ts.variables()), 1)
for var in ts.variables():
assert var in self.test_vars
@@ -36,6 +37,7 @@ def test_1open_single_file(self):

def test_2open_directory(self):
with pyaro.open_timeseries(self.engine, self.testdata_dir, filters=[]) as ts:
ts.read()
self.assertGreaterEqual(len(ts.variables()), 1)
for var in ts.variables():
assert var in self.test_vars

0 comments on commit 5f243e6

Please sign in to comment.