Skip to content

Commit

Permalink
Merge pull request #33 from pranavanba/main
Browse files Browse the repository at this point in the history
Update regex applied to dictionary csv file name in deidentification step
  • Loading branch information
pranavanba authored Jun 12, 2024
2 parents 9d5d1d1 + 3412ebe commit d8d4ee9
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions scripts/deidentification/deidentification.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ unlink('./dictionaries/', recursive = T, force = T)
# Get dictionaries --------------------------------------------------------
system('synapse get -r syn52316269 --downloadLocation ./dictionaries/ --manifest suppress')

list.files("./dictionaries", full.names = T) %>% lapply(function(x) {
y <- x %>% stringr::str_remove_all("[0-9]")
file.rename(from = x, to = y)
})
list.files("./dictionaries", full.names = T) %>%
lapply(function(x) {
y <- x %>% stringr::str_remove_all("[0-9]|_[0-9]+")
file.rename(from = x, to = y)
})

junk <- lapply(list.files("./dictionaries/", full.names = T), function(f) {
lines <- readLines(f)

modified_lines <- lapply(lines, function(line) {
line <- gsub('"', '', line)
if (grepl(",APPROVED|,UNAPPROVED", line)) {
# line <- gsub("^(.*?)(,APPROVED|,approved|,UNAPPROVED|,unapproved)", '"\\1"\\2', line)
line <- gsub('(.*?)"?(,APPROVED|,approved|,UNAPPROVED|,unapproved)', '"\\1"\\2', line)
}
return(line)
Expand Down

0 comments on commit d8d4ee9

Please sign in to comment.