forked from seandavi/awesome-single-cell
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added a tool to create computational formats of data
I added a tool to read from Luke's google sheet and converts results to a tidy csv format as well as a json format. This script can serve as a place to build further computed fields such as badges, etc.
- Loading branch information
Showing
3 changed files
with
83 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.httr-oauth |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
## Utilities | ||
|
||
### Create Accessory files | ||
|
||
The file, `create_files`, is an R script meant to run from the command-line. It accesses the Google Sheet here: | ||
|
||
https://docs.google.com/spreadsheets/d/1n3hXzzhrHZgClLD8P3cyIrK_6YgdjtdGlLswNAuoKSI/edit?usp=sharing | ||
|
||
And creates a tidy csv file of software after adding a few automatically-calculated columns. It also writes a json-format file of the same data, but with one record per software package. Categories are collapsed to a json array. To run: | ||
|
||
``` | ||
create_files.R /path/to/git/repo | ||
# or | ||
create_files.R /path/to/directory/for/accessory_files | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#!/usr/bin/env Rscript | ||
library(docopt) | ||
"Usage: create_files OUTPUT_DIR | ||
-h --help show this | ||
This utility script converts the google spreadsheet here: | ||
https://docs.google.com/spreadsheets/d/1n3hXzzhrHZgClLD8P3cyIrK_6YgdjtdGlLswNAuoKSI/edit?usp=sharing | ||
to a set of files including: | ||
- OUTPUT/single-cell-software-tidy.csv | ||
- OUTPUT/single-cell-software.json | ||
" -> doc | ||
opts = docopt(doc) | ||
print(opts) | ||
|
||
library(googlesheets) | ||
library(readr) | ||
library(jsonlite) | ||
library(dplyr) | ||
library(tidyr) | ||
library(lubridate) | ||
# this is the google spreadsheet ID to use | ||
gskey = "1n3hXzzhrHZgClLD8P3cyIrK_6YgdjtdGlLswNAuoKSI" | ||
gs_auth() | ||
|
||
|
||
#' Create tidy sheet from the google sheet | ||
#' @export | ||
get_tidy_sw_list = function(gskey) { | ||
swsheet = gs_key(gskey) %>% | ||
gs_read() %>% | ||
mutate(Preprint = (`Pub Date`=="PREPRINT")) %>% | ||
mutate(`Pub Date`=as_date(`Pub Date`)) %>% | ||
mutate(Preprint = ifelse(Preprint==TRUE,TRUE,NA)) %>% | ||
mutate(Added = as_date(Added)) %>% | ||
mutate(Updated = as_date(Updated)) %>% | ||
mutate(DOI_url = ifelse(is.na(DOI),NA,paste0('http://dx.doi.org/',DOI))) | ||
gather(swsheet,key='category',value='val',-Description,-Name,-Platform,-DOI,-`Pub Date`,-Updated,-Added,-Preprint,-Code,-DOI_url,-License) %>% | ||
mutate(Github = grepl('github',Code)) %>% | ||
mutate(Bioconductor = grepl('bioconductor',Code,ignore.case = TRUE)) %>% | ||
mutate(CRAN = grepl('cran\\.r-project',Code)) %>% | ||
filter(val==TRUE) %>% | ||
select(-val) | ||
} | ||
|
||
tidysw_to_list_df <- function(tidysw) { | ||
catlist = split(tidysw$category,f=tidysw$Name) | ||
tidyswl = tidysw %>% select(-category) %>% unique() | ||
tidyswl[['categories']] = catlist[tidyswl$Name] | ||
tidyswl | ||
} | ||
|
||
|
||
#' write out json and csv files | ||
#' | ||
#' @export | ||
write_files = function(destdir) { | ||
dir.create(destdir, recursive = TRUE) | ||
swsheet = get_tidy_sw_list(gskey) | ||
write_csv(swsheet,path=file.path(destdir,'single-cell-software_tidy.csv')) | ||
writeLines(toJSON(tidysw_to_list_df(swsheet),pretty=TRUE),file.path(destdir,'single-cell-software.json')) | ||
} | ||
|
||
write_files(opts$OUTPUT_DIR) |