Skip to content

Commit

Permalink
fix CLI and cleanup code and add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mishaschwartz committed Oct 9, 2024
1 parent d758a83 commit eb5b900
Show file tree
Hide file tree
Showing 13 changed files with 291 additions and 245 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
* Adding an end date to `CMIP6_UofT`'s temporal extent for better rendering in STAC Browser
* Updates to datacube extension helper routines for `CMIP6_UofT`.
* Make pyessv-archive a requirement for *only* the cmip6 implementation instead of for the whole CLI
* Fix bug where logger setup failed
* Simplify CLI argument constructor code (for cleaner and more testable code)
* Add tests for CLI and implementations when invoked through the CLI
* Refactored code dealing with requests and authentication to the `requests.py` file
* Add `--log_file` command line option to specify a non-default location to write log files to

## [0.6.0](https://github.com/crim-ca/stac-populator/tree/0.6.0) (2024-02-22)

Expand Down
226 changes: 38 additions & 188 deletions STACpopulator/cli.py
Original file line number Diff line number Diff line change
@@ -1,217 +1,67 @@
import argparse
import glob
import functools
import importlib
import logging
import os
import sys
from types import ModuleType
import warnings
from datetime import datetime
from http import cookiejar
from typing import Callable, Optional
from datetime import datetime, timezone
from typing import Callable

import requests
from requests.auth import AuthBase, HTTPBasicAuth, HTTPDigestAuth, HTTPProxyAuth
from requests.sessions import Session

from STACpopulator import __version__
from STACpopulator import __version__, implementations
from STACpopulator.exceptions import STACPopulatorError
from STACpopulator.logging import setup_logging

POPULATORS = {}


class HTTPBearerTokenAuth(AuthBase):
def __init__(self, token: str) -> None:
self._token = token

def __call__(self, r: requests.PreparedRequest) -> requests.PreparedRequest:
r.headers["Authorization"] = f"Bearer {self._token}"
return r


class HTTPCookieAuth(cookiejar.MozillaCookieJar):
"""
Employ a cookie-jar file for authorization.
Examples of useful command:
.. code-block:: shell
curl --cookie-jar /path/to/cookie-jar.txt [authorization-provider-arguments]
curl \
-k \
-X POST \
--cookie-jar /tmp/magpie-cookie.txt \
-d '{"user_name":"...","password":"..."}' \
-H 'Accept:application/json' \
-H 'Content-Type:application/json' \
'https://{hostname}/magpie/signin'
.. note::
Due to implementation details with :mod:`requests`, this must be passed directly to the ``cookies``
attribute rather than ``auth`` as in the case for other authorization handlers.
"""


def add_request_options(parser: argparse.ArgumentParser) -> None:
"""
Adds arguments to a parser to allow update of a request session definition used across a populator procedure.
"""
parser.add_argument(
"--no-verify",
"--no-ssl",
"--no-ssl-verify",
dest="verify",
action="store_false",
help="Disable SSL verification (not recommended unless for development/test servers).",
)
parser.add_argument("--cert", type=argparse.FileType(), required=False, help="Path to a certificate file to use.")
parser.add_argument(
"--auth-handler",
choices=["basic", "digest", "bearer", "proxy", "cookie"],
required=False,
help="Authentication strategy to employ for the requests session.",
)
parser.add_argument(
"--auth-identity",
required=False,
help="Bearer token, cookie-jar file or proxy/digest/basic username:password for selected authorization handler.",
)


def apply_request_options(session: Session, namespace: argparse.Namespace) -> None:
"""
Applies the relevant request session options from parsed input arguments.
"""
session.verify = namespace.verify
session.cert = namespace.cert
if namespace.auth_handler in ["basic", "digest", "proxy"]:
usr, pwd = namespace.auth_identity.split(":", 1)
if namespace.auth_handler == "basic":
session.auth = HTTPBasicAuth(usr, pwd)
elif namespace.auth_handler == "digest":
session.auth = HTTPDigestAuth(usr, pwd)
else:
session.auth = HTTPProxyAuth(usr, pwd)
elif namespace.auth_handler == "bearer":
session.auth = HTTPBearerTokenAuth(namespace.auth_identity)
elif namespace.auth_handler == "cookie":
session.cookies = HTTPCookieAuth(namespace.auth_identity)
session.cookies.load(namespace.auth_identity)


def make_main_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="stac-populator", description="STACpopulator operations.")
def add_parser_args(parser: argparse.ArgumentParser) -> dict[str, Callable]:
parser.add_argument(
"--version",
"-V",
action="version",
version=f"%(prog)s {__version__}",
help="prints the version of the library and exits",
)
commands = parser.add_subparsers(title="command", dest="command", description="STAC populator command to execute.")

run_cmd_parser = make_run_command_parser(parser.prog)
commands.add_parser(
"run",
prog=f"{parser.prog} {run_cmd_parser.prog}",
parents=[run_cmd_parser],
formatter_class=run_cmd_parser.formatter_class,
usage=run_cmd_parser.usage,
add_help=False,
help=run_cmd_parser.description,
description=run_cmd_parser.description,
parser.add_argument("--debug", action="store_const", const=logging.DEBUG, help="set logger level to debug")
parser.add_argument(
"--log_file", help="file to write log output to. By default logs will be written to the current directory."
)
commands_subparser = parser.add_subparsers(
title="command", dest="command", description="STAC populator command to execute.", required=True
)
run_parser = commands_subparser.add_parser("run", description="Run a STACpopulator implementation")
populators_subparser = run_parser.add_subparsers(
title="populator", dest="populator", description="Implementation to run."
)
for implementation_module_name, module in implementation_modules().items():
implementation_parser = populators_subparser.add_parser(implementation_module_name)
module.add_parser_args(implementation_parser)

# add more commands as needed...
parser.add_argument("--debug", action="store_true", help="Set logger level to debug")

return parser


def make_run_command_parser(parent) -> argparse.ArgumentParser:
"""
Groups all sub-populator CLI listed in :py:mod:`STACpopulator.implementations` as a common ``stac-populator`` CLI.
Dispatches the provided arguments to the appropriate sub-populator CLI as requested. Each sub-populator CLI must
implement functions ``make_parser`` and ``main`` to generate the arguments and dispatch them to the corresponding
caller. The ``main`` function should accept a sequence of string arguments, which can be passed to the parser
obtained from ``make_parser``.

An optional ``runner`` can also be defined in each populator module. If provided, the namespace arguments that have
already been parsed to resolve the populator to run will be used directly, avoiding parsing arguments twice.
"""
parser = argparse.ArgumentParser(prog="run", description="STACpopulator implementation runner.")
subparsers = parser.add_subparsers(title="populator", dest="populator", description="Implementation to run.")
populators_impl = "implementations"
populators_dir = os.path.join(os.path.dirname(__file__), populators_impl)
populator_mods = glob.glob(f"{populators_dir}/**/[!__init__]*.py", recursive=True) # potential candidate scripts
for populator_path in sorted(populator_mods):
populator_script = populator_path.split(populators_dir, 1)[1][1:]
populator_py_mod = os.path.splitext(populator_script)[0].replace(os.sep, ".")
populator_name, pop_mod_file = populator_py_mod.rsplit(".", 1)
populator_root = f"STACpopulator.{populators_impl}.{populator_name}"
pop_mod_file_loc = f"{populator_root}.{pop_mod_file}"
@functools.cache
def implementation_modules() -> dict[str, ModuleType]:
modules = {}
for implementation_module_name in implementations.__all__:
try:
populator_module = importlib.import_module(pop_mod_file_loc, populator_root)
except STACPopulatorError as e:
warnings.warn(f"Could not load extension {populator_name} because of error {e}")
continue
parser_maker: Callable[[], argparse.ArgumentParser] = getattr(populator_module, "make_parser", None)
populator_runner = getattr(populator_module, "runner", None) # optional, call main directly if not available
populator_caller = getattr(populator_module, "main", None)
if callable(parser_maker) and callable(populator_caller):
populator_parser = parser_maker()
populator_prog = f"{parent} {parser.prog} {populator_name}"
subparsers.add_parser(
populator_name,
prog=populator_prog,
parents=[populator_parser],
formatter_class=populator_parser.formatter_class,
add_help=False, # add help disabled otherwise conflicts with this main populator help
help=populator_parser.description,
description=populator_parser.description,
usage=populator_parser.usage,
modules[implementation_module_name] = importlib.import_module(
f".{implementation_module_name}", implementations.__package__
)
POPULATORS[populator_name] = {
"name": populator_name,
"caller": populator_caller,
"parser": populator_parser,
"runner": populator_runner,
}
return parser
except STACPopulatorError as e:
warnings.warn(f"Could not load extension {implementation_module_name} because of error {e}")
return modules


def main(*args: str) -> Optional[int]:
parser = make_main_parser()
args = args or sys.argv[1:] # same as was parse args does, but we must provide them to subparser
ns = parser.parse_args(args=args) # if 'command' or 'populator' unknown, auto prints the help message with exit(2)
params = vars(ns)
populator_cmd = params.pop("command")
if not populator_cmd:
parser.print_help()
return 0
result = None
if populator_cmd == "run":
populator_name = params.pop("populator")
def run(ns: argparse.Namespace) -> int:
if ns.command == "run":
logfile_name = ns.log_file or f"{ns.populator}_log_{datetime.now(timezone.utc).isoformat() + 'Z'}.jsonl"
setup_logging(logfile_name, ns.debug or logging.INFO)
return implementation_modules()[ns.populator].runner(ns) or 0

# Setup the application logger:
fname = f"{populator_name}_log_{datetime.utcnow().isoformat() + 'Z'}.jsonl"
log_level = logging.DEBUG if ns.debug else logging.INFO
setup_logging(fname, log_level)

if not populator_name:
parser.print_help()
return 0
populator_args = args[2:] # skip [command] [populator]
populator_caller = POPULATORS[populator_name]["caller"]
populator_runner = POPULATORS[populator_name]["runner"]
if populator_runner:
result = populator_runner(ns)
else:
result = populator_caller(*populator_args)
return 0 if result is None else result
def main(*args: str) -> int:
parser = argparse.ArgumentParser()
add_parser_args(parser)
ns = parser.parse_args(args or None)
return run(ns)


if __name__ == "__main__":
Expand Down
3 changes: 3 additions & 0 deletions STACpopulator/implementations/CMIP6_UofT/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .add_CMIP6 import add_parser_args, runner

__all__ = ["add_parser_args", "runner"]
25 changes: 13 additions & 12 deletions STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import json
import logging
import os
from typing import Any, MutableMapping, NoReturn, Optional, Union
import sys
from typing import Any, MutableMapping, Optional, Union

from pystac import STACValidationError
from pystac.extensions.datacube import DatacubeExtension
from requests.sessions import Session

from STACpopulator.cli import add_request_options, apply_request_options
from STACpopulator.requests import add_request_options, apply_request_options
from STACpopulator.extensions.cmip6 import CMIP6Helper, CMIP6Properties
from STACpopulator.extensions.datacube import DataCubeHelper
from STACpopulator.extensions.thredds import THREDDSExtension, THREDDSHelper
Expand Down Expand Up @@ -78,17 +79,17 @@ def create_stac_item(

try:
item.validate()
except STACValidationError:
except STACValidationError as e:
raise Exception("Failed to validate STAC item") from e

# print(json.dumps(item.to_dict()))
return json.loads(json.dumps(item.to_dict()))


def make_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="CMIP6 STAC populator from a THREDDS catalog or NCML XML.")
parser.add_argument("stac_host", type=str, help="STAC API address")
parser.add_argument("href", type=str, help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
def add_parser_args(parser: argparse.ArgumentParser) -> None:
parser.description="CMIP6 STAC populator from a THREDDS catalog or NCML XML."
parser.add_argument("stac_host", help="STAC API URL")
parser.add_argument("href", help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
parser.add_argument("--update", action="store_true", help="Update collection and its items")
parser.add_argument(
"--mode",
Expand All @@ -105,10 +106,9 @@ def make_parser() -> argparse.ArgumentParser:
),
)
add_request_options(parser)
return parser


def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:
def runner(ns: argparse.Namespace) -> int:
LOGGER.info(f"Arguments to call: {vars(ns)}")

with Session() as session:
Expand All @@ -123,13 +123,14 @@ def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:
ns.stac_host, data_loader, update=ns.update, session=session, config_file=ns.config, log_debug=ns.debug
)
c.ingest()
return 0


def main(*args: str) -> Optional[int]:
parser = make_parser()
def main(*args: str) -> int:
parser = argparse.ArgumentParser()
ns = parser.parse_args(args or None)
return runner(ns)


if __name__ == "__main__":
main()
sys.exit(main())
3 changes: 3 additions & 0 deletions STACpopulator/implementations/DirectoryLoader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .crawl_directory import add_parser_args, runner

__all__ = ["add_parser_args", "runner"]
Loading

0 comments on commit eb5b900

Please sign in to comment.