Skip to content

Commit

Permalink
Merge pull request #35 from pranavanba/main
Browse files Browse the repository at this point in the history
Remove existing folders in synapse before indexing new files
  • Loading branch information
pranavanba authored Jul 9, 2024
2 parents 5f76724 + 9f69e27 commit 3dd6dc9
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion scripts/main/archive-to-current.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,27 @@ if (!is.null(synFindEntityId(validated_date, config::get("PARQUET_FOLDER_ARCHIVE
# Modify cohort identifier in dir name
junk <- sapply(list.dirs(AWS_CURRENT_DOWNLOAD_LOCATION), replace_equal_with_underscore)

# Generate manifest of existing files
# Generate manifest of existing files and remove existing folders
SYNAPSE_AUTH_TOKEN <- Sys.getenv('SYNAPSE_AUTH_TOKEN')
manifest_cmd <- glue::glue('SYNAPSE_AUTH_TOKEN="{SYNAPSE_AUTH_TOKEN}" synapse manifest --parent-id {PARQUET_FOLDER_CURRENT} --manifest ./current_manifest.tsv {AWS_CURRENT_DOWNLOAD_LOCATION}')
system(manifest_cmd)

current_syn_folders <-
read_tsv(
file = "current_manifest.tsv",
show_col_types = FALSE
) %>%
pull(parent) %>%
unique()

syn_folders_removed <-
lapply(current_syn_folders, function(x) {
synapser::synDelete(x)
})

manifest_cmd <- glue::glue('SYNAPSE_AUTH_TOKEN="{SYNAPSE_AUTH_TOKEN}" synapse manifest --parent-id {PARQUET_FOLDER_CURRENT} --manifest ./current_manifest.tsv {AWS_CURRENT_DOWNLOAD_LOCATION}')
system(manifest_cmd)

# Get a list of all files to upload and their synapse locations (parentId)
STR_LEN_PARQUET_FINAL_LOCATION <- stringr::str_length(AWS_CURRENT_DOWNLOAD_LOCATION)

Expand Down

0 comments on commit 3dd6dc9

Please sign in to comment.