Skip to content

Commit

Permalink
Merge pull request #126 from VForWaTer/array_type_datatable
Browse files Browse the repository at this point in the history
Array type datatable
  • Loading branch information
mmaelicke authored May 28, 2021
2 parents 64aedd0 + 58962cc commit 993f04d
Show file tree
Hide file tree
Showing 21 changed files with 1,038 additions and 453 deletions.
8 changes: 6 additions & 2 deletions metacatalog/api/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def add_unit(session, name, symbol, si=None):
return add_record(session=session, tablename='units', **attrs)


def add_variable(session, name, symbol, unit):
def add_variable(session, name, symbol, column_names, unit):
r"""Add variable record
Add a new variable to the database.
Expand All @@ -130,6 +130,10 @@ def add_variable(session, name, symbol, unit):
symbol : str
The variable symbol. Try to use the correct
physical variable symbols and avoid dublicates.
column_names : list
.. versionadded:: 0.3.0
List of default column names that will be displayed when exporting the data.
The columns are named in the same order as they appear in the list.
unit : int, str
Either the id or **full** name of the unit to be
linked to this variable.
Expand All @@ -141,7 +145,7 @@ def add_variable(session, name, symbol, unit):
"""
#create the attribute dict
attrs = dict(name=name, symbol=symbol)
attrs = dict(name=name, symbol=symbol, column_names=column_names)

# get the unit
if isinstance(unit, int):
Expand Down
13 changes: 10 additions & 3 deletions metacatalog/api/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _remove_nan_from_dict(d):
return out_d


def import_table_data(fname, InstanceClass):
def import_table_data(fname, InstanceClass, array_col_name=None):
try:
df = pd.read_csv(os.path.join(DATAPATH, fname))
except ParserError as e:
Expand All @@ -131,6 +131,10 @@ def import_table_data(fname, InstanceClass):
# replace nan with None
df = df.where(df.notnull(), None)

# handle arrays
if array_col_name is not None:
df[array_col_name] = [[cell] for cell in df[array_col_name].values]

# build an instance for each line and return
return [InstanceClass(**_remove_nan_from_dict(d)) for d in df.to_dict(orient='record')]

Expand Down Expand Up @@ -226,8 +230,11 @@ def populate_defaults(session, ignore_tables=[], bump_sequences=10000):
print('Finished %s' % table)
continue

# get the classes
instances = import_table_data('%s.csv' % table, InstanceClass)
elif table == 'variables':
instances = import_table_data('variables.csv', InstanceClass, array_col_name='column_names')
else:
# get the classes
instances = import_table_data('%s.csv' % table, InstanceClass)

# add
try:
Expand Down
2 changes: 1 addition & 1 deletion metacatalog/data/datatypes.csv
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ id,parent_id,name,title,description
20,16,idataframe,indexed table,"NDArray with any index except datetime information."
21,20,vdataframe,"named, indexed table","idataframe with additional name property of any valid metacatalog Variable."
22,16,time-dataframe,timeseries table,"NDArray indexed by datetime information. The datetimes need to be of increasing order."
23,22,vtime-dataframe,named timeseries table,"Timeseries table that holds an additional Variable name to describe the content."
23,22,vtime-dataframe,named timeseries table,"Timeseries table that holds an additional Variable name to describe the content."
1 change: 1 addition & 0 deletions metacatalog/data/entrygroup_types.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ id,name,description
1,Project,"A Project groups datasets into a lager collection of datasets that have been collected or used in the same Campaign."
2,Composite,"A composite dataset groups a number of datasets that are inseparable."
3,Split dataset,"A split dataset groups a number of identical datasets that have to be split e.g. in case of different time scale resolution."
4,Label,"A Label groups different datasets into a larger collection of datasets, that are now a composite, but i.e. collected at the same site."
40 changes: 21 additions & 19 deletions metacatalog/data/variables.csv
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
id,name,symbol,unit_id,keyword_id
1,air temperature,Ta,101,111
2,soil temperature,Ts,101,5736
3,water temperature,Tw,101,7402
4,discharge,Q,108,7327
5,air pressure,p,104,109
6,relative humidity,RH,112,6308
7,daily rainfall sum,P,103,6434
8,rainfall intensity,Pi,105,6436
9,solar irradiance,SI,115,5236
10,net radiation,Rn,115,5227
11,gravimetric water content,u,114,5727
12,volumetric water content,theta,113,5727
13,precision,sigma,21,
14,sap flow,Fm,22,7424
15,matric potential,phi,24,
16,bulk electrical conductivity,bEC,25,5111
17,specific electrical conductivity,sEC,25,5111
18,river water level,L,2,
id,name,symbol,column_names,unit_id,keyword_id
1,air temperature,Ta,air_temperature,101,111
2,soil temperature,Ts,soil_temperature,101,5736
3,water temperature,Tw,water_temperature,101,7402
4,discharge,Q,discharge,108,7327
5,air pressure,p,air_pressure,104,109
6,relative humidity,RH,relative_humidity,112,6308
7,daily rainfall sum,P,daily_rainfall_sum,103,6434
8,rainfall intensity,Pi,rainfall_intensity,105,6436
9,solar irradiance,SI,solar_irradiance,115,5236
10,net radiation,Rn,net_radiation,115,5227
11,gravimetric water content,u,gravimetric_water_content,114,5727
12,volumetric water content,theta,volumetric_water_content,113,5727
13,precision,sigma,precision,21,
14,sap flow,Fm,sap_flow,22,7424
15,matric potential,phi,matric_potential,24,
16,bulk electrical conductivity,bEC,bulk_electrical_conductivity,25,5111
17,specific electrical conductivity,sEC,specific_electrical_conductivity,25,5111
18,river water level,L,river_water_level,2,
19,evapotranspiration,ET,evapotranspiration,103,6319
20,drainage,D,drainage,103,7328
2 changes: 2 additions & 0 deletions metacatalog/db/revisions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
rev3,
rev4,
rev5,
rev6,
)

revisions = {
Expand All @@ -14,4 +15,5 @@
3: rev3,
4: rev4,
5: rev5,
6: rev6,
}
119 changes: 119 additions & 0 deletions metacatalog/db/revisions/rev6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Metacatalog database revision
-----------------------------
date: 2021-05-20T11:02:13.319954
revision #6
"""
from sqlalchemy.orm import Session
from metacatalog import api, models


UPGRADE_SQL = """
-- add a new Entrygroup type
INSERT INTO entrygroup_types (id, name, description) VALUES
(4,'Label','A Label groups different datasets into a larger collection of datasets, that are now a composite, but i.e. collected at the same site.');
-- todo, here the new column creation is missing
ALTER TABLE variables ADD COLUMN column_names CHARACTER VARYING(128)[];
ALTER TABLE datasources ADD COLUMN data_names CHARACTER VARYING(128)[];
-- add new variables
INSERT INTO variables (id,name,symbol,column_names,unit_id,keyword_id) VALUES
(19,'evapotranspiration','ET','{"evapotranspiration"}',103,6319),
(20,'drainage','D','{"drainage"}',103,7328)
ON CONFLICT ON CONSTRAINT variables_pkey
DO
UPDATE SET column_names=EXCLUDED.column_names;
-- add column names
UPDATE variables set column_names='{"air_temperature"}' WHERE id=1;
UPDATE variables set column_names='{"soil_temperature"}' WHERE id=2;
UPDATE variables set column_names='{"water_temperature"}' WHERE id=3;
UPDATE variables set column_names='{"discharge"}' WHERE id=4;
UPDATE variables set column_names='{"air_pressure"}' WHERE id=5;
UPDATE variables set column_names='{"relative_humidity"}' WHERE id=6;
UPDATE variables set column_names='{"daily_rainfall_sum"}' WHERE id=7;
UPDATE variables set column_names='{"rainfall_intensity"}' WHERE id=8;
UPDATE variables set column_names='{"solar_irradiance"}' WHERE id=9;
UPDATE variables set column_names='{"net_radiation"}' WHERE id=10;
UPDATE variables set column_names='{"gravimetric_water_content"}' WHERE id=11;
UPDATE variables set column_names='{"volumetric_water_content"}' WHERE id=12;
UPDATE variables set column_names='{"precision"}' WHERE id=13;
UPDATE variables set column_names='{"sap_flow"}' WHERE id=14;
UPDATE variables set column_names='{"matric_potential"}' WHERE id=15;
UPDATE variables set column_names='{"bulk_electrical_conductivity"}' WHERE id=16;
UPDATE variables set column_names='{"specific_electrical_conductivity"}' WHERE id=17;
UPDATE variables set column_names='{"river_water_level"}' WHERE id=18;
-- column names are build therefore the data_names can be filled
UPDATE datasources SET data_names=column_names
FROM entries JOIN variables ON entries.variable_id=variables.id
WHERE datasources.id = entries.datasource_id;
-- rename timeseries to timeseries_1d
ALTER TABLE timeseries RENAME TO timeseries_1d;
ALTER TABLE timeseries_1d RENAME CONSTRAINT timeseries_pkey TO timeseries_1d_pkey;
ALTER TABLE timeseries_1d RENAME CONSTRAINT timeseries_entry_id_fkey TO timeseries_1d_entry_id_fkey;
-- update datasources
UPDATE datasources SET path='timeseries_1d' WHERE path='timeseries';
-- create new table
CREATE TABLE timeseries (
entry_id INTEGER NOT NULL,
tstamp timestamp without time zone NOT NULL,
"data" REAL[],
"precision" REAL[]
);
ALTER TABLE timeseries ADD CONSTRAINT timeseries_pkey PRIMARY KEY (entry_id, tstamp);
ALTER TABLE timeseries ADD CONSTRAINT timeseries_entry_id_fkey FOREIGN KEY (entry_id) REFERENCES entries (id);
-- make entrygroup titles longer
ALTER TABLE entrygroups ALTER COLUMN title TYPE character varying(250);
COMMIT;
"""

DOWNGRADE_SQL = """
-- delete entrygroups that use the Label type
DELETE FROM nm_entrygroups WHERE group_id in (SELECT id FROM entrygroups WHERE type_id=4);
DELETE FROM entrygroups WHERE type_id=4;
-- remove the entrygroup type
DELETE FROM entrygroup_types WHERE id=4;
-- remove the colmap column
ALTER TABLE variables DROP COLUMN column_names;
ALTER TABLE datasources DROP COLUMN data_names;
-- delete timeseries
DROP TABLE timeseries;
COMMIT;
-- rename the stuff back
ALTER TABLE timeseries_1d RENAME TO timeseries;
ALTER TABLE timeseries RENAME CONSTRAINT timeseries_1d_pkey TO timeseries_pkey;
ALTER TABLE timeseries RENAME CONSTRAINT timeseries_1d_entry_id_fkey TO timeseries_entry_id_fkey;
-- update datasources
UPDATE datasources SET path='timeseries' WHERE path='timeseries_1d';
-- change entrygroup title back
ALTER TABLE entrygroups ALTER COLUMN title TYPE character varying(40);
COMMIT;
"""

# define the upgrade function
def upgrade(session: Session):
# create the new EntryGroup type
with session.bind.connect() as con:
con.execute(UPGRADE_SQL)


# define the downgrade function
def downgrade(session: Session):
with session.bind.connect() as con:
con.execute(DOWNGRADE_SQL)
Loading

0 comments on commit 993f04d

Please sign in to comment.