Skip to content

Latest commit

 

History

History
317 lines (229 loc) · 9.14 KB

README.md

File metadata and controls

317 lines (229 loc) · 9.14 KB
output
html_document
highlight keep_md theme
tango
true
readable

CTSgetR



CTSgetR provides a consitent interface to translation of chemical names and over 200 database identifiers including InChIKey, HMDB, KEGG and PubChem. Translation of chemical names is hard. Use CTSgetR to robustly translate chemical names to other identifiers through 1) conversion to InChIKey 2) biological or popularity scoring and 3) translation to over 200 biological database identifiers. CTSgetR uses a sqlite database to cache and speed all of your routine translations.

This package supports metabolite identifier translation:

using R

Installation

install_github("dgrapov/CTSgetR")

Make sure CTS API is available

library(CTSgetR)
GET('https://cts.fiehnlab.ucdavis.edu/services') %>%
  http_status(.) %>%
  {if( .$category != 'Success'){stop('Oops looks like https://cts.fiehnlab.ucdavis.edu/services is down!') }} 

View some of the possible translation options between > 200 databases

trans<-unlist(valid_from())
head(trans,10)
##  [1] "BioCyc"                    "CAS"                      
##  [3] "ChEBI"                     "Chemical Name"            
##  [5] "Human Metabolome Database" "InChIKey"                 
##  [7] "KEGG"                      "LMSD"                     
##  [9] "LipidMAPS"                 "PubChem CID"

Find a database of interest

want<-'CID'
trans[grepl(want,trans,ignore.case=TRUE)]
## [1] "PubChem CID"

Initialize a local database to speed up routine queries

db_name<-'ctsgetr.sqlite'
init_CTSgetR_db(db_name)
db_stats()

Translation examples

Chemical Name to InChIKey

db_name<-'ctsgetr.sqlite' # local cache
id<-c("alanine",'lactic acid')
from<-"Chemical Name"
to<-"InChIKey"

CTSgetR(id,from,to,db_name=db_name)
##            id                    InChIKey
## 1     alanine QNAYBMKLOCPYGJ-REOHCLBHSA-N
## 2 lactic acid JVTAAEKCZFNVCJ-UHFFFAOYSA-N

One identifier to many

The example below shows the alternative data.frame input format for more complex queries.
id<-c("alanine",'lactic acid')
from<-"Chemical Name"
to<- c( "PubChem CID", "KEGG","Human Metabolome Database")

CTSgetR(id,from,to,db_name=db_name)
##            id Human Metabolome Database   KEGG PubChem CID
## 1     alanine               HMDB0000161 C00041        5950
## 2 lactic acid               HMDB0144295 C01432    19789253

Many identifiers to one

Build up complex queries by combining data frames of id, from to to values.
 #from many to many
  args <-structure(list(id = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
                                         3L, 4L, 4L), .Label = c("alanine", "foo", "lactic acid", "HMDB0000161"
                                         ), class = "factor"), from = structure(c(1L, 1L, 1L, 1L, 1L, 
                                                                                  1L, 1L, 1L, 1L, 2L, 2L), .Label = c("Chemical Name", "Human Metabolome Database"
                                                                                  ), class = "factor"), to = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 
                                                                                                                         3L, 3L, 3L, 2L, 1L), .Label = c("PubChem CID", "KEGG", "Human Metabolome Database"
                                                                                                                         ), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                    -11L))
args  
##             id                      from                        to
## 1      alanine             Chemical Name               PubChem CID
## 2          foo             Chemical Name               PubChem CID
## 3  lactic acid             Chemical Name               PubChem CID
## 4      alanine             Chemical Name                      KEGG
## 5          foo             Chemical Name                      KEGG
## 6  lactic acid             Chemical Name                      KEGG
## 7      alanine             Chemical Name Human Metabolome Database
## 8          foo             Chemical Name Human Metabolome Database
## 9  lactic acid             Chemical Name Human Metabolome Database
## 10 HMDB0000161 Human Metabolome Database                      KEGG
## 11 HMDB0000161 Human Metabolome Database               PubChem CID
args %>%
  split(.,.$from) %>%
  map(~CTSgetR(.$id,.$from,.$to,db_name=db_name)) %>%
  bind_rows(.)
##            id Human Metabolome Database   KEGG PubChem CID
## 1     alanine               HMDB0000161 C00041        5950
## 2         foo                      <NA>   <NA>        <NA>
## 3 lactic acid               HMDB0144295 C01432    19789253
## 4 HMDB0000161                      <NA> C00041        5950

Deploy CTSgetR as a dockerized API


The following docker image and docker-compose commands can be used to build and run the CTSgetR package as an opencpu based API.

The CTSgetR image contains an opencpu and Rstudio server

Build docker image

build

export rstudio_pass=mypassword # rstudio server password for user opencpu
docker-compose -f docker-compose.yml build --force-rm

Launch API

#mount to persist internal sqlite DB between updates 
export ctsgetr_db_mount=<local path to save database e.g. /mypath>
docker-compose -f docker-compose.yml up -d

Test API endpoints

bash

curl http://localhost/ocpu/library/CTSgetR/R/heartbeat

R

heartbeat
library(ocpuclient)

base_url<-'http://localhost/ocpu/'

endpoint<-'library/CTSgetR/R/heartbeat'
url<-paste0(base_url,endpoint)
post_ocpu(url=url)
translation
#translate
endpoint<-'library/CTSgetR/R/CTSgetR'
url<-paste0(base_url,endpoint)

id <-
  c("C15973",
    "C00026")
from <- "KEGG"
to <- "PubChem CID"

body<-list(id=id,from=from,to=to,db_name=db_name)


post_ocpu(url=url,body=body)

Launch shiny UI using asynchronous opencpu API

The following example shows a how to use a shiny module combined with futures and promises R packages to connect to an opencpu API uisng async calls.

library(shiny)
library(tippy)
library(CTSgetR) # local calls
library(ocpuclient) # CTSgetR opencpu API calls

Specify local database or API options

#one of local
Sys.setenv('ctsgetr_DB'='inst/ctsgetr.sqlite') #see section `in R` showing how to initialize a local databse
#or API
Sys.setenv('ctsgetr_DB'='/ctsgetr/inst/ctsgetr.sqlite') # in API docker for mount
Sys.setenv('CTSgetR_API'='http://localhost/ocpu/library/CTSgetR/R/CTSgetR') # url of API endpoint

User input translations

    library(promises)
    library(future)
    plan(multisession)
    
    
    #module
    ui <- fluidPage(
      
      sidebarLayout(position = "left",
                    sidebarPanel(tagList(mod_CTSgetR_ui("translate"))),
                    mainPanel(verbatimTextOutput("main_out")))
      
    )
    
    server <- function(input, output, session) {
      
      translation <- mod_CTSgetR_server('translate')
      
      output$main_out <- renderPrint({
        translation() %...>% print(.)
        
      })
    }
    
    shinyApp(ui, server)

Connect to other shiny components

library(promises)
    library(future)
    plan(multisession)
    
    #make `example` a reactive returning a data frame to update dynamically
    example<-data.frame('chemical_name' = c('alanine','Pyruvic acid'))
    
    #module
    ui <- fluidPage(
      
      sidebarLayout(position = "left",
                    sidebarPanel(tagList(mod_CTSgetR_ui("translate"))),
                    mainPanel(verbatimTextOutput("main_out")))
      
    )
    
    server <- function(input, output, session) {
      
      translation <- mod_CTSgetR_server('translate',data=example)
      
      output$main_out <- renderPrint({
        translation() %...>% print(.)
        
      })
    }
    
    shinyApp(ui, server)