From 4dcb6f56941fbd2e80bcf93999934490d07fd34e Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Mon, 10 Jun 2024 23:38:05 +0000 Subject: [PATCH 1/2] Rename sts_synindex_external script to internal_to_external_staging to better match naming conventions of other pipeline scripts --- Dockerfile | 2 +- README.md | 2 +- .../{sts_synindex_external.R => internal_to_external_staging.R} | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename scripts/main/{sts_synindex_external.R => internal_to_external_staging.R} (99%) diff --git a/Dockerfile b/Dockerfile index 8359861..65cbee1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,4 +29,4 @@ RUN sed -i -e "s|\"\"|\"\${AWS_SYNAPSE_TOKEN}\"\n|g" \ CMD R -e "q()" \ && sed -i -e "s|\${AWS_SYNAPSE_TOKEN}|$AWS_SYNAPSE_TOKEN|g"\ /root/.aws/config \ - && Rscript /root/recover-parquet-external/sts_synindex_external.R + && Rscript /root/recover-parquet-external/scripts/main/internal_to_external_staging.R diff --git a/README.md b/README.md index ca8faa8..abea084 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ git clone https://github.com/Sage-Bionetworks/recover-parquet-external.git 2. Modify the parameters in the [config](config/config.yml) as needed 3. Run [install_requirements.R](install_requirements.R) -4. Run [sts_synindex_external.R](scripts/main/sts_synindex_external.R) to generate the external parquet datasets in the staging locations (S3 and Synapse). +4. Run [internal_to_external_staging.R](scripts/main/internal_to_external_staging.R) to generate the external parquet datasets in the staging locations (S3 and Synapse). 5. Once the datasets in the staging location have been validated, run [staging_to_archive.R](scripts/main/staging_to_archive.R) to generate the validated external parquet datasets in the date-tagged prod Archive locations (S3 and Synapse). 6. As needed, run [archive-to-current.R](scripts/main/archive-to-current.R) to update the Current Freeze version of the external parquet data in the appropriate locations (S3 and Synapse). 7. **(Optional)** Setup a scheduled job (AWS, cron, etc.) using the docker image to run the pipeline at a set frequency or when certain conditions are met diff --git a/scripts/main/sts_synindex_external.R b/scripts/main/internal_to_external_staging.R similarity index 99% rename from scripts/main/sts_synindex_external.R rename to scripts/main/internal_to_external_staging.R index 6461e9e..5b138e7 100644 --- a/scripts/main/sts_synindex_external.R +++ b/scripts/main/internal_to_external_staging.R @@ -219,7 +219,7 @@ if (nrow(synapse_fileview)>0) { # Index each file in Synapse latest_commit <- gh::gh("/repos/:owner/:repo/commits/main", owner = "Sage-Bionetworks", repo = "recover-parquet-external") -latest_commit_this_file <- paste0(latest_commit$html_url %>% stringr::str_replace("commit", "blob"), "/scripts/main/sts_synindex_external.R") +latest_commit_this_file <- paste0(latest_commit$html_url %>% stringr::str_replace("commit", "blob"), "/scripts/main/internal_to_external_staging.R") act <- synapser::Activity(name = "Indexing", description = "Indexing external parquet datasets", From 06a874e59426355c735e5f79f44f6b4bcf93aae3 Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Mon, 10 Jun 2024 23:38:05 +0000 Subject: [PATCH 2/2] Rename sts_synindex_external script to internal_to_external_staging to better match naming conventions of other pipeline scripts --- Dockerfile | 2 +- README.md | 2 +- .../{sts_synindex_external.R => internal_to_external_staging.R} | 2 +- ..._synindex_external.R => test-internal_to_external_staging.R} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename scripts/main/{sts_synindex_external.R => internal_to_external_staging.R} (99%) rename tests/testthat/{test-sts_synindex_external.R => test-internal_to_external_staging.R} (100%) diff --git a/Dockerfile b/Dockerfile index 8359861..65cbee1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,4 +29,4 @@ RUN sed -i -e "s|\"\"|\"\${AWS_SYNAPSE_TOKEN}\"\n|g" \ CMD R -e "q()" \ && sed -i -e "s|\${AWS_SYNAPSE_TOKEN}|$AWS_SYNAPSE_TOKEN|g"\ /root/.aws/config \ - && Rscript /root/recover-parquet-external/sts_synindex_external.R + && Rscript /root/recover-parquet-external/scripts/main/internal_to_external_staging.R diff --git a/README.md b/README.md index ca8faa8..abea084 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ git clone https://github.com/Sage-Bionetworks/recover-parquet-external.git 2. Modify the parameters in the [config](config/config.yml) as needed 3. Run [install_requirements.R](install_requirements.R) -4. Run [sts_synindex_external.R](scripts/main/sts_synindex_external.R) to generate the external parquet datasets in the staging locations (S3 and Synapse). +4. Run [internal_to_external_staging.R](scripts/main/internal_to_external_staging.R) to generate the external parquet datasets in the staging locations (S3 and Synapse). 5. Once the datasets in the staging location have been validated, run [staging_to_archive.R](scripts/main/staging_to_archive.R) to generate the validated external parquet datasets in the date-tagged prod Archive locations (S3 and Synapse). 6. As needed, run [archive-to-current.R](scripts/main/archive-to-current.R) to update the Current Freeze version of the external parquet data in the appropriate locations (S3 and Synapse). 7. **(Optional)** Setup a scheduled job (AWS, cron, etc.) using the docker image to run the pipeline at a set frequency or when certain conditions are met diff --git a/scripts/main/sts_synindex_external.R b/scripts/main/internal_to_external_staging.R similarity index 99% rename from scripts/main/sts_synindex_external.R rename to scripts/main/internal_to_external_staging.R index 6461e9e..5b138e7 100644 --- a/scripts/main/sts_synindex_external.R +++ b/scripts/main/internal_to_external_staging.R @@ -219,7 +219,7 @@ if (nrow(synapse_fileview)>0) { # Index each file in Synapse latest_commit <- gh::gh("/repos/:owner/:repo/commits/main", owner = "Sage-Bionetworks", repo = "recover-parquet-external") -latest_commit_this_file <- paste0(latest_commit$html_url %>% stringr::str_replace("commit", "blob"), "/scripts/main/sts_synindex_external.R") +latest_commit_this_file <- paste0(latest_commit$html_url %>% stringr::str_replace("commit", "blob"), "/scripts/main/internal_to_external_staging.R") act <- synapser::Activity(name = "Indexing", description = "Indexing external parquet datasets", diff --git a/tests/testthat/test-sts_synindex_external.R b/tests/testthat/test-internal_to_external_staging.R similarity index 100% rename from tests/testthat/test-sts_synindex_external.R rename to tests/testthat/test-internal_to_external_staging.R