From 0593655fae6e1929db36d5e412f3bcb9ae07c140 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Jun 2024 14:02:27 +1000 Subject: [PATCH 1/3] deleted transfer command --- xnat_ingest/cli/__init__.py | 1 - xnat_ingest/cli/transfer.py | 273 ------------------------------------ 2 files changed, 274 deletions(-) delete mode 100644 xnat_ingest/cli/transfer.py diff --git a/xnat_ingest/cli/__init__.py b/xnat_ingest/cli/__init__.py index b2fc2a8..7d93c7b 100644 --- a/xnat_ingest/cli/__init__.py +++ b/xnat_ingest/cli/__init__.py @@ -1,4 +1,3 @@ from .base import cli from .upload import upload from .stage import stage -from .transfer import transfer diff --git a/xnat_ingest/cli/transfer.py b/xnat_ingest/cli/transfer.py deleted file mode 100644 index 39dcb0d..0000000 --- a/xnat_ingest/cli/transfer.py +++ /dev/null @@ -1,273 +0,0 @@ -import subprocess as sp -import typing as ty -from pathlib import Path -import tempfile -import shutil -import click -from tqdm import tqdm -from arcana.xnat import Xnat -from ..utils import ( - logger, - LogFile, - LogEmail, - StoreCredentials, - XnatLogin, - MailServer, - set_logger_handling, -) -from .base import cli - - -@cli.command( - help="""transfers data from a staging directory to an intermediate remote store, -such as an S3 bucket or a remote server accessible via SSH, before they are finally -uploaded to XNAT. - -STAGING_DIR is the directory that the files for each session are collated to - -REMOTE_STORE is location of the remote store to transfer the data to. This can be an -AWS S3 bucket or a remote server accessible via SSH. The format of the remote store -is determined by the prefix of the path. For example, a path starting with 's3://' is -interpreted as an S3 bucket, while a path starting with 'xxxx@xxxx:' is interpreted as -an SSH server. -""", -) -@click.argument( - "staging_dir", type=click.Path(path_type=Path), envvar="XNAT_INGEST_STAGE_DIR" -) -@click.argument("remote_store", type=str, envvar="XNAT_INGEST_TRANSFER_REMOTE_STORE") -@click.option( - "--store-credentials", - type=StoreCredentials.cli_type, - metavar=" ", - envvar="XNAT_INGEST_TRANSFER_STORE_CREDENTIALS", - default=None, - nargs=2, - help="Credentials to use to access of data stored in remote stores (e.g. AWS S3)", -) -@click.option( - "--log-level", - default="info", - type=str, - envvar="XNAT_INGEST_TRANSFER_LOGLEVEL", - help=("The level of the logging printed to stdout"), -) -@click.option( - "--log-file", - "log_files", - default=None, - type=LogFile.cli_type, - nargs=2, - multiple=True, - metavar=" ", - envvar="XNAT_INGEST_TRANSFER_LOGFILE", - help=( - 'Location to write the output logs to, defaults to "upload-logs" in the ' - "export directory" - ), -) -@click.option( - "--log-email", - "log_emails", - type=LogEmail.cli_type, - nargs=3, - metavar="
", - multiple=True, - envvar="XNAT_INGEST_TRANSFER_LOGEMAIL", - help=( - "Email(s) to send logs to. When provided in an environment variable, " - "mail and log level are delimited by ',' and separate destinations by ';'" - ), -) -@click.option( - "--mail-server", - type=MailServer.cli_type, - metavar=" ", - default=None, - envvar="XNAT_INGEST_TRANSFER_MAILSERVER", - help=( - "the mail server to send logger emails to. When provided in an environment variable, " - "args are delimited by ';'" - ), -) -@click.option( - "--delete/--dont-delete", - default=False, - envvar="XNAT_INGEST_TRANSFER_DELETE", - help="Whether to delete the session directories after they have been uploaded or not", -) -@click.option( - "--raise-errors/--dont-raise-errors", - default=False, - type=bool, - help="Whether to raise errors instead of logging them (typically for debugging)", -) -@click.option( - "--xnat-login", - nargs=3, - type=XnatLogin.cli_type, - default=None, - metavar=" ", - help="The XNAT server to upload to plus the user and password to use", - envvar="XNAT_INGEST_TRANSFER_XNAT_LOGIN", -) -@click.option( - "--require-manifest/--dont-require-manifest", - default=False, - help="Whether to require a manifest file to be present in the session directory", - envvar="XNAT_INGEST_TRANSFER_REQUIRE_MANIFEST", -) -def transfer( - staging_dir: Path, - remote_store: str, - store_credentials: ty.Optional[StoreCredentials], - log_files: ty.List[LogFile], - log_level: str, - log_emails: ty.List[LogEmail], - mail_server: MailServer, - delete: bool, - raise_errors: bool, - xnat_login: ty.Optional[XnatLogin], - require_manifest: bool, -): - - if not staging_dir.exists(): - raise ValueError(f"Staging directory '{staging_dir}' does not exist") - - set_logger_handling( - log_level=log_level, - log_files=log_files, - log_emails=log_emails, - mail_server=mail_server, - ) - - if remote_store.startswith("s3://"): - store_type = "s3" - elif "@" in remote_store: - store_type = "ssh" - else: - raise ValueError( - f"Remote store {remote_store} is not a valid remote store. " - "It should be an S3 bucket or an SSH server" - ) - - if xnat_login is not None: - xnat_repo = Xnat( - server=xnat_login.host, - user=xnat_login.user, - password=xnat_login.password, - cache_dir=Path(tempfile.mkdtemp()), - ) - else: - xnat_repo = None - - for project_dir in tqdm( - list(staging_dir.iterdir()), - f"Transferring projects to remote store {remote_store}", - ): - if project_dir.name.startswith("UNKNOWN"): - logger.error( - "Project %s is not recognised and will not be transferred, please " - "rename manually and transfer again", - project_dir.name, - ) - continue - if xnat_repo: - with xnat_repo.connection: - try: - xnat_repo.connection.projects[project_dir.name] - except KeyError: - logger.error( - "Project %s does not exist on XNAT. Please rename the directory " - "to match the project ID on XNAT", - project_dir.name, - ) - continue - for subject_dir in tqdm( - list(project_dir.iterdir()), - f"Transferring subjects for {project_dir.name} project", - ): - if subject_dir.name.startswith("UNKNOWN"): - logger.error( - "Subject % in project %s is not recognised and will not be " - "transferred, please rename manually and transfer again", - subject_dir.name, - project_dir.name, - ) - continue - for session_dir in tqdm( - list(subject_dir.iterdir()), - f"Transferring sessions for {project_dir.name}:{subject_dir.name} subject", - ): - if session_dir.name.startswith("UNKNOWN"): - logger.error( - "Session %s in subject %s in project %s is not recognised and " - "will not be transferred, please rename manually and transfer again", - session_dir.name, - subject_dir.name, - project_dir.name, - ) - continue - if require_manifest and not (session_dir / "MANIFEST.yaml").exists(): - logger.warning( - "Session %s in subject %s in project %s does not contain a " - "'MANIFEST.yaml' and is therefore considered be incomplete and " - "will not be synced", - session_dir.name, - subject_dir.name, - project_dir.name, - ) - continue - remote_path = ( - remote_store - + "/" - + project_dir.name - + "/" - + subject_dir.name - + "/" - + session_dir.name - ) - if store_type == "s3": - logger.debug( - "Transferring %s to S3 (%s)", session_dir, remote_store - ) - aws_cmd = ( - sp.check_output("which aws", shell=True).strip().decode("utf-8") - ) - if store_credentials is None: - raise ValueError( - "No store credentials provided for S3 bucket transfer" - ) - process = sp.Popen( - [ - aws_cmd, - "s3", - "sync", - "--quiet", - str(session_dir), - remote_path, - ], - env={ - "AWS_ACCESS_KEY_ID": store_credentials.access_key, - "AWS_SECRET_ACCESS_KEY": store_credentials.access_secret, - }, - stdout=sp.PIPE, - stderr=sp.PIPE, - ) - stdout, stderr = process.communicate() - if process.returncode != 0: - raise RuntimeError("AWS sync failed: " + stderr.decode("utf-8")) - elif store_type == "ssh": - logger.debug( - "Transferring %s to %s via SSH", session_dir, remote_store - ) - sp.check_call(["rsync", "--quiet", str(session_dir), remote_path]) - else: - assert False - if delete: - logger.info("Deleting %s after successful upload", session_dir) - shutil.rmtree(session_dir) - - -if __name__ == "__main__": - transfer() From 5b951d109e50a7915bbe8ad103f667739ec53120 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Jun 2024 14:02:27 +1000 Subject: [PATCH 2/3] added verify-ssl option to upload tool --- pyproject.toml | 4 ++-- xnat_ingest/cli/upload.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f3b593c..042b63d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ dependencies = [ "paramiko", "xnat", "arcana", - "arcana-xnat", + "arcana-xnat >=0.4.1", ] license = { file = "LICENSE" } authors = [{ name = "Thomas G. Close", email = "thomas.close@sydney.edu.au" }] @@ -47,7 +47,7 @@ dev = ["black", "pre-commit", "codespell", "flake8", "flake8-pyproject"] test = [ "pytest >=6.2.5", "pytest-env>=0.6.2", - "pytest-cov>=2.12.1", + "pytest-cov>=2.12.1", "xnat4tests >=0.3", "medimages4tests >=0.3.1", "PyYAML", diff --git a/xnat_ingest/cli/upload.py b/xnat_ingest/cli/upload.py index c1901e8..52a0b34 100644 --- a/xnat_ingest/cli/upload.py +++ b/xnat_ingest/cli/upload.py @@ -154,6 +154,13 @@ default=0, help="The number of days to keep files in the remote store for", ) +@click.option( + "--verify-ssl/--dont-verify-ssl", + type=bool, + default=False, + envvar="XNAT_INGEST_UPLOAD_VERIFY_SSL", + help="Whether to verify the SSL certificate of the XNAT server", +) def upload( staged: str, server: str, @@ -170,6 +177,7 @@ def upload( temp_dir: ty.Optional[Path], use_manifest: bool, clean_up_older_than: int, + verify_ssl: bool, ): set_logger_handling( @@ -182,7 +190,11 @@ def upload( tempfile.tempdir = str(temp_dir) xnat_repo = Xnat( - server=server, user=user, password=password, cache_dir=Path(tempfile.mkdtemp()) + server=server, + user=user, + password=password, + cache_dir=Path(tempfile.mkdtemp()), + verify_ssl=verify_ssl, ) with xnat_repo.connection: From 9881ea49cfa67c2f5d7708f678498cb4b5892d18 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Jun 2024 14:05:40 +1000 Subject: [PATCH 3/3] strip 'v' from docker tag in ci-cd --- .github/workflows/ci-cd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 8ece5d9..29b185c 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -103,6 +103,7 @@ jobs: id: versions run: | VERSION=$(git describe --tags --abbrev=0) + VERSION=${VERSION#v} IMAGE=${{ env.REGISTRY }}/$(echo "${{ env.IMAGE_NAME }}" | awk '{print tolower($0)}') echo "IMAGE=$IMAGE" >> $GITHUB_OUTPUT echo "TAG=$IMAGE:$VERSION" >> $GITHUB_OUTPUT