Skip to content

Commit

Permalink
Fix CLI query
Browse files Browse the repository at this point in the history
  • Loading branch information
stuartmcalpine committed Dec 13, 2024
1 parent 2fa218a commit ab36caf
Showing 1 changed file with 39 additions and 53 deletions.
92 changes: 39 additions & 53 deletions src/dataregistry_cli/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ def dregs_ls(args):
Can apply a "owner" and/or "owner_type" filter.
Note that the production schema will always be searched against, even if it
is not the passed `schema`.
Parameters
----------
args : argparse object
Expand Down Expand Up @@ -95,17 +92,6 @@ def dregs_ls(args):
site=args.site,
)

# Establish connection to the production schema
if datareg.db_connection.schema != args.prod_schema:
datareg_prod = DataRegistry(
config_file=args.config_file,
schema=args.prod_schema,
root_dir=args.root_dir,
site=args.site,
)
else:
datareg_prod = None

# By default, search for "our" dataset
if args.owner is None:
args.owner = os.getenv("USER")
Expand All @@ -126,42 +112,42 @@ def dregs_ls(args):
if args.keyword is not None:
_print_cols.append("keyword.keyword")

# Loop over this schema and the production schema and print the results
for this_datareg in [datareg, datareg_prod]:
if this_datareg is None:
continue

mystr = f"Schema = {this_datareg.db_connection.schema}"
print(f"\n{mystr}")
print("-" * len(mystr))

# Query
results = this_datareg.Query.find_datasets(
[x for x in _print_cols],
filters,
return_format="dataframe",
)

# Strip "dataset." from column names
new_col = {
x: x.split("dataset.")[1] for x in results.columns if "dataset." in x
}
results.rename(columns=new_col, inplace=True)

# Add compressed columns
if "owner" in results.keys():
results["type/owner"] = results["owner_type"] + "/" + results["owner"]
del results["owner"]
del results["owner_type"]

if "register_date" in results.keys():
results["register_date"] = results["register_date"].dt.date

if "keyword.keyword" in results.keys():
del results["keyword.keyword"]

# Print
with pd.option_context(
"display.max_colwidth", args.max_chars, "display.max_rows", args.max_rows
):
print(results)
mystr = (
f"Schema = {datareg.db_connection.schema} "
f"({datareg.db_connection.metadata['schema_version']})\n"
f"Production schema: {datareg.db_connection.production_schema} "
f"({datareg.db_connection.metadata['prod_schema_version']})"
)
print(f"\n{mystr}")
print("-" * len(mystr))

# Query
results = datareg.Query.find_datasets(
[x for x in _print_cols],
filters,
return_format="dataframe",
)

# Strip "dataset." from column names
new_col = {
x: x.split("dataset.")[1] for x in results.columns if "dataset." in x
}
results.rename(columns=new_col, inplace=True)

# Add compressed columns
if "owner" in results.keys():
results["type/owner"] = results["owner_type"] + "/" + results["owner"]
del results["owner"]
del results["owner_type"]

if "register_date" in results.keys():
results["register_date"] = results["register_date"].dt.date

if "keyword.keyword" in results.keys():
del results["keyword.keyword"]

# Print
with pd.option_context(
"display.max_colwidth", args.max_chars, "display.max_rows", args.max_rows
):
print(results)

0 comments on commit ab36caf

Please sign in to comment.