From 3412ebe283577cbdf742076d8bc0693a6c4ccfa7 Mon Sep 17 00:00:00 2001
From: Pranav Anbarasu <pranavanba@gmail.com>
Date: Wed, 12 Jun 2024 22:27:23 +0000
Subject: [PATCH] Update regex applied to dictionary csv file name

---
 scripts/deidentification/deidentification.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/deidentification/deidentification.R b/scripts/deidentification/deidentification.R
index b7070ad..9e92e60 100644
--- a/scripts/deidentification/deidentification.R
+++ b/scripts/deidentification/deidentification.R
@@ -10,10 +10,11 @@ unlink('./dictionaries/', recursive = T, force = T)
 # Get dictionaries --------------------------------------------------------
 system('synapse get -r syn52316269 --downloadLocation ./dictionaries/ --manifest suppress')
 
-list.files("./dictionaries", full.names = T) %>% lapply(function(x) {
-  y <- x %>% stringr::str_remove_all("[0-9]")
-  file.rename(from = x, to = y)
-})
+list.files("./dictionaries", full.names = T) %>% 
+  lapply(function(x) {
+    y <- x %>% stringr::str_remove_all("[0-9]|_[0-9]+")
+    file.rename(from = x, to = y)
+  })
 
 junk <- lapply(list.files("./dictionaries/", full.names = T), function(f) {
   lines <- readLines(f)
@@ -21,7 +22,6 @@ junk <- lapply(list.files("./dictionaries/", full.names = T), function(f) {
   modified_lines <- lapply(lines, function(line) {
     line <- gsub('"', '', line)
     if (grepl(",APPROVED|,UNAPPROVED", line)) {
-      # line <- gsub("^(.*?)(,APPROVED|,approved|,UNAPPROVED|,unapproved)", '"\\1"\\2', line)
       line <- gsub('(.*?)"?(,APPROVED|,approved|,UNAPPROVED|,unapproved)', '"\\1"\\2', line)
     }
     return(line)