From 1879c826da43dfa03253970cc39f90383b2f0318 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 3 Jul 2023 09:42:11 -0700 Subject: [PATCH 1/4] change defaults for beta --- src/dataregistry/db_basic.py | 2 +- src/dataregistry/registrar.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dataregistry/db_basic.py b/src/dataregistry/db_basic.py index f7f3b2e0..42df483c 100644 --- a/src/dataregistry/db_basic.py +++ b/src/dataregistry/db_basic.py @@ -11,7 +11,7 @@ ''' Low-level utility routines and classes for accessing the registry ''' -SCHEMA_VERSION = 'registry_dev' +SCHEMA_VERSION = 'registry_beta' __all__ = ['create_db_engine', 'add_table_row', 'TableCreator', 'TableMetadata', 'SCHEMA_VERSION', 'ownertypeenum', 'dataorgenum'] diff --git a/src/dataregistry/registrar.py b/src/dataregistry/registrar.py index 34455a66..7503e991 100644 --- a/src/dataregistry/registrar.py +++ b/src/dataregistry/registrar.py @@ -15,7 +15,7 @@ if os.getenv("DREGS_ROOT_DIR"): _DEFAULT_ROOT_DIR = os.getenv("DREGS_ROOT_DIR") else: - _DEFAULT_ROOT_DIR = '/global/cfs/cdirs/desc-co/jrbogart/dregs_root' #temporary + _DEFAULT_ROOT_DIR = '/global/cfs/cdirs/desc-co/registry-beta' #temporary class Registrar(): ''' @@ -77,7 +77,7 @@ def _handle_data(self, relative_path, old_location, verbose): """ Find characteristics of dataset (i.e., is it a file or directory, how many files and total disk space of the dataset). - + If old_location is not None, copy the dataset into the data registry. Parameters @@ -99,7 +99,7 @@ def _handle_data(self, relative_path, old_location, verbose): total_size : float Total disk space of dataset in bytes """ - + # Get destination directory in data registry. dest = _form_dataset_path(self._owner_type, self._owner, relative_path, self._root_dir) @@ -248,13 +248,13 @@ def register_dataset(self, relative_path, version, old_location : str, optional Absolute location of dataset to copy. - If None dataset should already be at correct relative_path. + If None dataset should already be at correct relative_path. copy : bool, optional If true copy data from ``old_location`` to the database. If False create a symlink (defaults to True). is_dummy : bool True for "dummy" datasets (no data is copied, for testing purposes - only) + only) verbose : bool Provide some additional output information From cf4fe3ebc5f5764e4d6fe6739a3a4b5b2468acbb Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 3 Jul 2023 12:31:58 -0700 Subject: [PATCH 2/4] improve docstrings --- src/dataregistry/_version.py | 2 +- src/dataregistry/query.py | 14 ++++++++++++++ src/dataregistry/registrar.py | 28 +++++++++++++++++++++++++++- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/dataregistry/_version.py b/src/dataregistry/_version.py index 3ced3581..b5fdc753 100644 --- a/src/dataregistry/_version.py +++ b/src/dataregistry/_version.py @@ -1 +1 @@ -__version__ = "0.2.1" +__version__ = "0.2.2" diff --git a/src/dataregistry/query.py b/src/dataregistry/query.py index be17d053..079f98cd 100644 --- a/src/dataregistry/query.py +++ b/src/dataregistry/query.py @@ -86,6 +86,20 @@ class Query: """ def __init__(self, db_engine, dialect, schema_version=SCHEMA_VERSION): + ''' + Create a new Query object. Note this call should be preceded + by a call to create_db_engine, which will return values for + db_engine and dialect + + Parameters + ---------- + db_engine : sqlalchemy engine object + dialect : str + identifies target db type (e.g. 'postgresql') + schema_version : str + Which database schema to connect to. + Current default is 'registry_beta' + ''' self._engine = db_engine self._dialect = dialect if dialect == "sqlite": diff --git a/src/dataregistry/registrar.py b/src/dataregistry/registrar.py index 7503e991..d4f77379 100644 --- a/src/dataregistry/registrar.py +++ b/src/dataregistry/registrar.py @@ -15,14 +15,36 @@ if os.getenv("DREGS_ROOT_DIR"): _DEFAULT_ROOT_DIR = os.getenv("DREGS_ROOT_DIR") else: - _DEFAULT_ROOT_DIR = '/global/cfs/cdirs/desc-co/registry-beta' #temporary + _DEFAULT_ROOT_DIR = "/global/cfs/cdirs/desc-co/registry-beta" #temporary class Registrar(): ''' Register new datasets, executions ("runs") or alias names + ''' def __init__(self, db_engine, dialect, owner_type, owner=None, schema_version=SCHEMA_VERSION): + ''' + Create a new Registrar object. Note this call should be preceded + by a call to create_db_engine, which will return values for + db_engine and dialect. + + Parameters + ---------- + db_engine : sqlalchemy engine object + dialect : str + identifies target db type (e.g. "postgresql") + owner_type : owenertypeenum + which of the allowed categories will be destination for + new dataset entries + owner : str + Forms part of relative path of dataset location. + Always "production" for production databases. + Otherwise defaults to "." + schema_version : str + Which database schema to connect to. + Current default is 'registry_beta' + ''' self._engine = db_engine self._dialect = dialect self._owner_type = owner_type.value @@ -219,6 +241,10 @@ def register_dataset(self, relative_path, version, relative_path : str Destination for the dataset within the data registry. Path is relative to ``//``. + If the environment variable DREGS_ROOT is defined, this + value is used for ````. + Otherwise currently ```` defaults to + /global/cfs/cdirs/desc-co/registry-beta version : str Semantic version string of the format MAJOR.MINOR.PATCH *or* a special flag "patch", "minor" or "major". From fcd76e420577a41826303e323526c8b0728cc59c Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 3 Jul 2023 13:04:44 -0700 Subject: [PATCH 3/4] make "registry_dev" the default schema for db creation as well as other access --- scripts/create_registry_db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/create_registry_db.py b/scripts/create_registry_db.py index 515b696c..fdae13b8 100644 --- a/scripts/create_registry_db.py +++ b/scripts/create_registry_db.py @@ -17,7 +17,7 @@ parser = argparse.ArgumentParser(description=''' Creates dataregistry tables in specified schema and connection information (config)''', formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument('--schema', help="name of schema to contain tables. Will be created if it doesn't already exist", default="registry_dev") +parser.add_argument('--schema', help="name of schema to contain tables. Will be created if it doesn't already exist", default="registry_beta") parser.add_argument('--config', default="", help="path to config file used to establish connection") args = parser.parse_args() From 370b27d30e429b7a5c281d2a3d3445f2804f60df Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 3 Jul 2023 13:24:04 -0700 Subject: [PATCH 4/4] In create script get default schema from the correct place, don't make it up --- scripts/create_registry_db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/create_registry_db.py b/scripts/create_registry_db.py index fdae13b8..7e5d1ca1 100644 --- a/scripts/create_registry_db.py +++ b/scripts/create_registry_db.py @@ -5,7 +5,7 @@ from datetime import datetime from sqlalchemy import Column, Integer, String, DateTime, Boolean, Index, Float from sqlalchemy import ForeignKey, UniqueConstraint, Enum -from dataregistry.db_basic import create_db_engine, TableCreator, ownertypeenum, dataorgenum, add_table_row +from dataregistry.db_basic import create_db_engine, TableCreator, ownertypeenum, dataorgenum, add_table_row, SCHEMA_VERSION from dataregistry.git_util import get_git_info from dataregistry import __version__ @@ -17,7 +17,7 @@ parser = argparse.ArgumentParser(description=''' Creates dataregistry tables in specified schema and connection information (config)''', formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument('--schema', help="name of schema to contain tables. Will be created if it doesn't already exist", default="registry_beta") +parser.add_argument('--schema', help="name of schema to contain tables. Will be created if it doesn't already exist", default=f"{SCHEMA_VERSION}") parser.add_argument('--config', default="", help="path to config file used to establish connection") args = parser.parse_args()