From 547a60cb853182aa5296983b9bb1582e7ffddce3 Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Wed, 7 Feb 2024 18:47:02 +0000 Subject: [PATCH] Increase efficiency and reduce runtime by syncing only the validated staging folders instead of entire bucket --- staging_to_archive.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/staging_to_archive.R b/staging_to_archive.R index 76c3d0f..0426d4a 100644 --- a/staging_to_archive.R +++ b/staging_to_archive.R @@ -46,12 +46,12 @@ if (!is.null(synFindEntityId(validated_date, config::get("PARQUET_FOLDER_ARCHIVE sync_cmd <- glue::glue("aws s3 --profile service-catalog sync {STAGING_TO_ARCHIVE_DOWNLOAD_LOCATION} {base_s3_uri_archive}{validated_date}/ --exclude '*owner.txt*' --exclude '*archive*'") system(sync_cmd) - rm(sync_cmd, validated_date) + rm(sync_cmd) - # Sync entire bucket to local + # Sync new date dir in archive bucket to local unlink(STAGING_TO_ARCHIVE_DOWNLOAD_LOCATION, recursive = T, force = T) unlink(AWS_ARCHIVE_DOWNLOAD_LOCATION, recursive = T, force = T) - sync_cmd <- glue::glue('aws s3 --profile service-catalog sync {base_s3_uri_archive} {AWS_ARCHIVE_DOWNLOAD_LOCATION} --exclude "*owner.txt*" --exclude "*archive*"') + sync_cmd <- glue::glue('aws s3 --profile service-catalog sync {base_s3_uri_archive}{validated_date}/ {AWS_ARCHIVE_DOWNLOAD_LOCATION}/{validated_date}/ --exclude "*owner.txt*" --exclude "*archive*"') system(sync_cmd) # Modify cohort identifier in dir name