From 02d884f6f09671c76b5431ad04c17f032bf06c65 Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Mon, 22 Apr 2024 20:38:14 +0000 Subject: [PATCH 1/2] Use https format for s3 path in file provenance --- scripts/main/sts_synindex_external.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/main/sts_synindex_external.R b/scripts/main/sts_synindex_external.R index 34040ab..4c9771c 100644 --- a/scripts/main/sts_synindex_external.R +++ b/scripts/main/sts_synindex_external.R @@ -218,7 +218,7 @@ latest_commit_this_file <- paste0(latest_commit$html_url %>% stringr::str_replac act <- synapser::Activity(name = "Indexing", description = "Indexing external parquet datasets", - used = paste0("s3://", latest_archive), + used = paste0("https://s3.amazonaws.com/", latest_archive), executed = latest_commit_this_file) if(nrow(synapse_manifest_to_upload) > 0){ From d1781a173e5659900ac0502458f73d59252fea70 Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Mon, 22 Apr 2024 20:45:01 +0000 Subject: [PATCH 2/2] Ensure that directories previously created by pipeline are deleted at the beginning --- scripts/main/sts_synindex_external.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/main/sts_synindex_external.R b/scripts/main/sts_synindex_external.R index 4c9771c..33362a3 100644 --- a/scripts/main/sts_synindex_external.R +++ b/scripts/main/sts_synindex_external.R @@ -91,6 +91,11 @@ replace_equal_with_underscore <- function(directory_path) { synapser::synLogin(authToken = Sys.getenv('SYNAPSE_AUTH_TOKEN')) config::get(config = "staging") %>% list2env(envir = .GlobalEnv) +unlink(x = c(AWS_PARQUET_DOWNLOAD_LOCATION, + AWS_ARCHIVE_DOWNLOAD_LOCATION, + PARQUET_FINAL_LOCATION), + recursive = TRUE, + force = TRUE) # Get STS credentials for input data bucket ------------------------------- token <-