diff --git a/sts_synindex_external.R b/sts_synindex_external.R index b8f1e80..a43b618 100644 --- a/sts_synindex_external.R +++ b/sts_synindex_external.R @@ -209,26 +209,33 @@ synapse_manifest_to_upload <- s3_file_key = gsub("cohort_", "cohort=", s3_file_key)) # Index each file in Synapse +latest_commit <- gh::gh("/repos/:owner/:repo/commits/main", owner = "Sage-Bionetworks", repo = "recover-parquet-external") +latest_commit_tree_url <- latest_commit$html_url %>% stringr::str_replace("commit", "tree") + if(nrow(synapse_manifest_to_upload) > 0){ for(file_number in seq_len(nrow(synapse_manifest_to_upload))){ tmp <- synapse_manifest_to_upload[file_number, c("path", "parent", "s3_file_key")] absolute_file_path <- tools::file_path_as_absolute(tmp$path) - + temp_syn_obj <- synapser::synCreateExternalS3FileHandle( bucket_name = PARQUET_BUCKET_EXTERNAL, s3_file_key = tmp$s3_file_key, file_path = absolute_file_path, parent = tmp$parent) - + new_fileName <- stringr::str_replace_all(temp_syn_obj$fileName, ':', '_colon_') - - f <- - synStore( - File(dataFileHandleId = temp_syn_obj$id, - parentId = tmp$parent, - name = new_fileName)) - + + f <- File(dataFileHandleId = temp_syn_obj$id, + parentId = tmp$parent, + name = new_fileName) + + f <- synStore(f, + activity = "Indexing", + activityDescription = "Indexing external parquet datasets", + used = PARQUET_FOLDER_INTERNAL, + executed = latest_commit_tree_url) + } }