Merge pull request #126 from VForWaTer/array_type_datatable

Array type datatable
VForWaTer · May 28, 2021 · 993f04d · 993f04d
2 parents 64aedd0 + 58962cc
commit 993f04d
Show file tree

Hide file tree

Showing 21 changed files with 1,038 additions and 453 deletions.
diff --git a/metacatalog/api/add.py b/metacatalog/api/add.py
@@ -116,7 +116,7 @@ def add_unit(session, name, symbol, si=None):
     return add_record(session=session, tablename='units', **attrs)
 
 
-def add_variable(session, name, symbol, unit):
+def add_variable(session, name, symbol, column_names, unit):
     r"""Add variable record
 
     Add a new variable to the database.
@@ -130,6 +130,10 @@ def add_variable(session, name, symbol, unit):
     symbol : str
         The variable symbol. Try to use the correct
         physical variable symbols and avoid dublicates.
+    column_names : list
+          .. versionadded:: 0.3.0
+          List of default column names that will be displayed when exporting the data.
+          The columns are named in the same order as they appear in the list.
     unit : int, str
         Either the id or **full** name of the unit to be
         linked to this variable.
@@ -141,7 +145,7 @@ def add_variable(session, name, symbol, unit):
 
     """
     #create the attribute dict
-    attrs = dict(name=name, symbol=symbol)
+    attrs = dict(name=name, symbol=symbol, column_names=column_names)
 
     # get the unit
     if isinstance(unit, int):

diff --git a/metacatalog/api/db.py b/metacatalog/api/db.py
@@ -121,7 +121,7 @@ def _remove_nan_from_dict(d):
     return out_d
 
 
-def import_table_data(fname, InstanceClass):
+def import_table_data(fname, InstanceClass, array_col_name=None):
     try:
         df = pd.read_csv(os.path.join(DATAPATH, fname))
     except ParserError as e:
@@ -131,6 +131,10 @@ def import_table_data(fname, InstanceClass):
     # replace nan with None
     df = df.where(df.notnull(), None)
 
+    # handle arrays
+    if array_col_name is not None:
+        df[array_col_name] = [[cell] for cell in df[array_col_name].values]
+
     # build an instance for each line and return
     return [InstanceClass(**_remove_nan_from_dict(d)) for d in df.to_dict(orient='record')]
 
@@ -226,8 +230,11 @@ def populate_defaults(session, ignore_tables=[], bump_sequences=10000):
             print('Finished %s' % table)
             continue
 
-        # get the classes
-        instances = import_table_data('%s.csv' % table, InstanceClass)
+        elif table == 'variables':
+            instances = import_table_data('variables.csv', InstanceClass, array_col_name='column_names')
+        else:
+            # get the classes
+            instances = import_table_data('%s.csv' % table, InstanceClass)
 
         # add
         try:

diff --git a/metacatalog/data/datatypes.csv b/metacatalog/data/datatypes.csv
@@ -12,4 +12,4 @@ id,parent_id,name,title,description
 20,16,idataframe,indexed table,"NDArray with any index except datetime information."
 21,20,vdataframe,"named, indexed table","idataframe with additional name property of any valid metacatalog Variable."
 22,16,time-dataframe,timeseries table,"NDArray indexed by datetime information. The datetimes need to be of increasing order."
-23,22,vtime-dataframe,named timeseries table,"Timeseries table that holds an additional Variable name to describe the content."
+23,22,vtime-dataframe,named timeseries table,"Timeseries table that holds an additional Variable name to describe the content."
diff --git a/metacatalog/data/entrygroup_types.csv b/metacatalog/data/entrygroup_types.csv
@@ -2,3 +2,4 @@ id,name,description
 1,Project,"A Project groups datasets into a lager collection of datasets that have been collected or used in the same Campaign."
 2,Composite,"A composite dataset groups a number of datasets that are inseparable."
 3,Split dataset,"A split dataset groups a number of identical datasets that have to be split e.g. in case of different time scale resolution."
+4,Label,"A Label groups different datasets into a larger collection of datasets, that are now a composite, but i.e. collected at the same site."
diff --git a/metacatalog/data/variables.csv b/metacatalog/data/variables.csv
@@ -1,19 +1,21 @@
-id,name,symbol,unit_id,keyword_id
-1,air temperature,Ta,101,111
-2,soil temperature,Ts,101,5736
-3,water temperature,Tw,101,7402
-4,discharge,Q,108,7327
-5,air pressure,p,104,109
-6,relative humidity,RH,112,6308
-7,daily rainfall sum,P,103,6434
-8,rainfall intensity,Pi,105,6436
-9,solar irradiance,SI,115,5236
-10,net radiation,Rn,115,5227
-11,gravimetric water content,u,114,5727
-12,volumetric water content,theta,113,5727
-13,precision,sigma,21,
-14,sap flow,Fm,22,7424
-15,matric potential,phi,24,
-16,bulk electrical conductivity,bEC,25,5111
-17,specific electrical conductivity,sEC,25,5111
-18,river water level,L,2,
+id,name,symbol,column_names,unit_id,keyword_id
+1,air temperature,Ta,air_temperature,101,111
+2,soil temperature,Ts,soil_temperature,101,5736
+3,water temperature,Tw,water_temperature,101,7402
+4,discharge,Q,discharge,108,7327
+5,air pressure,p,air_pressure,104,109
+6,relative humidity,RH,relative_humidity,112,6308
+7,daily rainfall sum,P,daily_rainfall_sum,103,6434
+8,rainfall intensity,Pi,rainfall_intensity,105,6436
+9,solar irradiance,SI,solar_irradiance,115,5236
+10,net radiation,Rn,net_radiation,115,5227
+11,gravimetric water content,u,gravimetric_water_content,114,5727
+12,volumetric water content,theta,volumetric_water_content,113,5727
+13,precision,sigma,precision,21,
+14,sap flow,Fm,sap_flow,22,7424
+15,matric potential,phi,matric_potential,24,
+16,bulk electrical conductivity,bEC,bulk_electrical_conductivity,25,5111
+17,specific electrical conductivity,sEC,specific_electrical_conductivity,25,5111
+18,river water level,L,river_water_level,2,
+19,evapotranspiration,ET,evapotranspiration,103,6319
+20,drainage,D,drainage,103,7328
diff --git a/metacatalog/db/revisions/__init__.py b/metacatalog/db/revisions/__init__.py
@@ -5,6 +5,7 @@
     rev3,
     rev4,
     rev5,
+    rev6,
 )
 
 revisions = {
@@ -14,4 +15,5 @@
     3: rev3,
     4: rev4,
     5: rev5,
+    6: rev6,
 }
diff --git a/metacatalog/db/revisions/rev6.py b/metacatalog/db/revisions/rev6.py
@@ -0,0 +1,119 @@
+"""
+Metacatalog database revision
+-----------------------------
+date: 2021-05-20T11:02:13.319954
+
+revision #6
+
+
+"""
+from sqlalchemy.orm import Session
+from metacatalog import api, models
+
+
+UPGRADE_SQL = """
+-- add a new Entrygroup type
+INSERT INTO entrygroup_types (id, name, description) VALUES 
+(4,'Label','A Label groups different datasets into a larger collection of datasets, that are now a composite, but i.e. collected at the same site.');
+
+-- todo, here the new column creation is missing
+ALTER TABLE variables ADD COLUMN column_names CHARACTER VARYING(128)[];
+ALTER TABLE datasources ADD COLUMN data_names CHARACTER VARYING(128)[];
+
+-- add new variables
+INSERT INTO variables (id,name,symbol,column_names,unit_id,keyword_id) VALUES
+    (19,'evapotranspiration','ET','{"evapotranspiration"}',103,6319),
+    (20,'drainage','D','{"drainage"}',103,7328)
+ON CONFLICT ON CONSTRAINT variables_pkey 
+DO 
+    UPDATE SET column_names=EXCLUDED.column_names;
+
+-- add column names
+UPDATE variables set column_names='{"air_temperature"}' WHERE id=1;
+UPDATE variables set column_names='{"soil_temperature"}' WHERE id=2;
+UPDATE variables set column_names='{"water_temperature"}' WHERE id=3;
+UPDATE variables set column_names='{"discharge"}' WHERE id=4;
+UPDATE variables set column_names='{"air_pressure"}' WHERE id=5;
+UPDATE variables set column_names='{"relative_humidity"}' WHERE id=6;
+UPDATE variables set column_names='{"daily_rainfall_sum"}' WHERE id=7;
+UPDATE variables set column_names='{"rainfall_intensity"}' WHERE id=8;
+UPDATE variables set column_names='{"solar_irradiance"}' WHERE id=9;
+UPDATE variables set column_names='{"net_radiation"}' WHERE id=10;
+UPDATE variables set column_names='{"gravimetric_water_content"}' WHERE id=11;
+UPDATE variables set column_names='{"volumetric_water_content"}' WHERE id=12;
+UPDATE variables set column_names='{"precision"}' WHERE id=13;
+UPDATE variables set column_names='{"sap_flow"}' WHERE id=14;
+UPDATE variables set column_names='{"matric_potential"}' WHERE id=15;
+UPDATE variables set column_names='{"bulk_electrical_conductivity"}' WHERE id=16;
+UPDATE variables set column_names='{"specific_electrical_conductivity"}' WHERE id=17;
+UPDATE variables set column_names='{"river_water_level"}' WHERE id=18;
+
+-- column names are build therefore the data_names can be filled
+UPDATE datasources SET data_names=column_names
+FROM entries JOIN variables ON entries.variable_id=variables.id
+WHERE datasources.id = entries.datasource_id;
+
+-- rename timeseries to timeseries_1d
+ALTER TABLE timeseries RENAME TO timeseries_1d;
+ALTER TABLE timeseries_1d RENAME CONSTRAINT timeseries_pkey TO timeseries_1d_pkey;
+ALTER TABLE timeseries_1d RENAME CONSTRAINT timeseries_entry_id_fkey TO timeseries_1d_entry_id_fkey;
+
+-- update datasources
+UPDATE datasources SET path='timeseries_1d' WHERE path='timeseries';
+
+-- create new table
+CREATE TABLE timeseries (
+    entry_id INTEGER NOT NULL,
+    tstamp timestamp without time zone NOT NULL,
+    "data" REAL[],
+    "precision" REAL[]
+);
+ALTER TABLE timeseries ADD CONSTRAINT timeseries_pkey PRIMARY KEY (entry_id, tstamp);
+ALTER TABLE timeseries ADD CONSTRAINT timeseries_entry_id_fkey FOREIGN KEY (entry_id) REFERENCES entries (id);
+
+-- make entrygroup titles longer
+ALTER TABLE entrygroups ALTER COLUMN title TYPE character varying(250);
+
+COMMIT;
+"""
+
+DOWNGRADE_SQL = """
+-- delete entrygroups that use the Label type
+DELETE FROM nm_entrygroups WHERE group_id in (SELECT id FROM entrygroups WHERE type_id=4);
+DELETE FROM entrygroups WHERE type_id=4;
+
+-- remove the entrygroup type
+DELETE FROM entrygroup_types WHERE id=4;
+
+-- remove the colmap column
+ALTER TABLE variables DROP COLUMN column_names;
+ALTER TABLE datasources DROP COLUMN data_names;
+
+-- delete timeseries
+DROP TABLE timeseries;
+COMMIT;
+
+-- rename the stuff back
+ALTER TABLE timeseries_1d RENAME TO timeseries;
+ALTER TABLE timeseries RENAME CONSTRAINT timeseries_1d_pkey TO timeseries_pkey;
+ALTER TABLE timeseries RENAME CONSTRAINT timeseries_1d_entry_id_fkey TO timeseries_entry_id_fkey;
+
+-- update datasources
+UPDATE datasources SET path='timeseries' WHERE path='timeseries_1d';
+
+-- change entrygroup title back
+ALTER TABLE entrygroups ALTER COLUMN title TYPE character varying(40);
+COMMIT;
+"""
+
+# define the upgrade function
+def upgrade(session: Session):
+    # create the new EntryGroup type
+    with session.bind.connect() as con:
+        con.execute(UPGRADE_SQL)
+
+
+# define the downgrade function
+def downgrade(session: Session):
+    with session.bind.connect() as con:
+        con.execute(DOWNGRADE_SQL)
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,6 +5,7 @@ @@
         rev3,
         rev4,
         rev5,
+        rev6,
     )
     revisions = {
@@ Expand All / @@ -14,4 +15,5 @@ @@
 : rev3,
 : rev4,
 : rev5,
+: rev6,
     }