Skip to content

Commit

Permalink
Merge pull request #28 from USEPA/27-add-functions-to-wrap-pubchem-li…
Browse files Browse the repository at this point in the history
…nk-to-ghs-classification

27 add functions to wrap pubchem link to ghs classification
  • Loading branch information
rachmaninoffpk authored Sep 12, 2024
2 parents b921302 + b43f402 commit 0f2b811
Show file tree
Hide file tree
Showing 88 changed files with 5,971 additions and 5,488 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Generated by roxygen2: do not edit by hand

S3method(print,ctx_credentials)
export(check_existence_by_dtxsid)
export(check_existence_by_dtxsid_batch)
export(chemical_contains)
export(chemical_contains_batch)
export(chemical_equal)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ information from 400 errors (@kisaacs1, #11).

## New features

* Added `check_existence_by_dtxsid()`, `check_existence_by_dtxsid_batch()`
functions. Updated the `Chemical.Rmd` vignette to include examples of how to use
these functions (#27).

* Added `get_httk_data()`, `get_httk_data_batch()`,
`get_general_exposure_prediction()`, `get_general_exposure_prediction_batch()`,
`get_demographic_exposure_prediction()`,
Expand Down
105 changes: 104 additions & 1 deletion R/chemical-APIs-batch.R
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,109 @@ generate_dtxsid_string <- function(items){
return(dtxsid_string)
}

#' Check existence by DTXSID batch
#'
#' @param DTXSID The chemical identifier DTXSIDs
#' @param API_key The user-specific API key
#' @param rate_limit Number of seconds to wait between each request.
#' @param Server The root address of the API endpoint
#' @param verbose A logical indicating whether some "progress report" should be
#' given.
#'
#' @return A data.table of information detailing valid and invalid DTXSIDs.
#' @export
#'
#' @examplesIf FALSE
#' dtxsids <- c('DTXSID7020182F', 'DTXSID7020182', 'DTXSID0020232F')
#' existence <- check_existence_by_dtxsid_batch(DTXSID = dtxsids)
check_existence_by_dtxsid_batch <- function(DTXSID = NULL,
API_key = NULL,
rate_limit = 0L,
Server = chemical_api_server,
verbose = FALSE){
if (is.null(API_key) || !is.character(API_key)){
if (has_ctx_key()) {
API_key <- ctx_key()
if (verbose) {
message('Using stored API key!')
}
}
}
if (!is.numeric(rate_limit) | (rate_limit < 0)){
warning('Setting rate limit to 0 seconds between requests!')
rate_limit <- 0L
}



if (!is.null(DTXSID)){
if (!is.character(DTXSID) & !all(sapply(DTXSID, is.character))){
stop('Please input a character list for DTXSID!')
}

DTXSID <- unique(DTXSID)
num_DTXSID <- length(DTXSID)
indices <- generate_ranges(num_DTXSID)

dt <- data.table::data.table(dtxsid = character(),
isSafetyData = logical(),
safetyUrl = character())

#names(dt) <- names

for (i in seq_along(indices)){
if (verbose) {
print(paste('The current index is i =', i, 'out of', length(indices)))
}

response <- httr::POST(url = paste0(Server, '/ghslink/to-dtxsid/'),
httr::add_headers(.headers = c(
'Accept' = 'application/json',
'Content-Type' = 'application/json',
'x-api-key' = API_key
)),
body = jsonlite::toJSON(DTXSID[indices[[i]]], auto_unbox = ifelse(length(DTXSID[indices[[i]]]) > 1, 'T', 'F')))

if (response$status_code == 200){
if (length(response$content) > 0){
res_content <- jsonlite::fromJSON(httr::content(response,
as = 'text',
encoding = "UTF-8"))
if (length(res_content$safetyUrl) > 0){


null_indices <- which(sapply(res_content$safetyUrl, is.null))
if (length(null_indices) > 0){
res_content$safetyUrl[null_indices] <- NA_character_
}
dt <- suppressWarnings(data.table::rbindlist(list(dt,
data.table::rbindlist(list(res_content))),
fill = TRUE))

}
}
}
Sys.sleep(rate_limit)
}

# Fix for bug in endpoint. DTXSIDs that are not valid do not have information
# returned. To overcome this, the single search on the missing DTXSIDs is
# exectued and combined with the valid responses.
missing <- setdiff(DTXSID, dt$dtxsid)
if (length(missing) > 0){
missing_info <- data.table::rbindlist(lapply(missing, check_existence_by_dtxsid, API_key = API_key))
final <- data.table::rbindlist(list(dt, missing_info))
return(final[match(DTXSID, final$dtxsid),])
}



} else {
stop('Please input a list of DTXSIDs!')
}
return(dt)
}

get_smiles_batch <- function(names = NULL,
API_key = NULL,
rate_limit = 0L,
Expand Down Expand Up @@ -1098,7 +1201,7 @@ chemical_equal_batch <- function(word_list = NULL,




return(return_list)
}

Expand Down
62 changes: 62 additions & 0 deletions R/chemical-APIs.R
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,68 @@ create_data.table_chemical_details <- function(index = -1){
return(data)
}

#' Check existence by DTXSID
#'
#' @param DTXSID The chemical identifier DTXSID
#' @param API_key The user-specific API key
#' @param Server The root address for the API endpoint
#' @param verbose A logical indicating whether some "progress report" should be
#' given.
#'
#' @return A data.table with information on whether the input DTXSID is valid,
#' and where to find more information on the chemical when the DTXSID is valid.
#' @export
#'
#' @examplesIf FALSE
#' # DTXSID for bpa
#' bpa <- check_existence_by_dtxsid('DTXSID7020182')
#' # False DTXSID
#' false_res <- check_existence_by_Dtxsid('DTXSID7020182f')

check_existence_by_dtxsid <- function(DTXSID = NULL,
API_key = NULL,
Server = chemical_api_server,
verbose = FALSE){
if (is.null(DTXSID) | !is.character(DTXSID)){
stop('Please input a DTXSID!')
}

if (is.null(API_key)){
if (has_ctx_key()) {
API_key <- ctx_key()
message('Using stored API key!')
}
}

response <- httr::GET(url = paste0(Server, '/ghslink/to-dtxsid/', DTXSID),
httr::add_headers(.headers = c(
'Content-Type' = 'application/json',
'x-api-key' = API_key)
)
)

if(response$status_code == 401){
stop('Please input an API_key!')
}
if(response$status_code == 200){
res_content <- jsonlite::fromJSON(httr::content(response,
as = 'text',
encoding = "UTF-8"))
if (is.null(res_content$safetyUrl)){
res_content$safetyUrl <- NA_character_
}
res <- data.table::rbindlist(list(res_content))
return(res)
} else {
if (verbose){
print(paste0('The request was unsuccessful, returning an error of ', response$status_code, '!'))
}
}
return()


}

get_chemical_details_by_listname <- function(listname = NULL,
API_key = NULL,
Server = chemical_api_server,
Expand Down
38 changes: 38 additions & 0 deletions man/check_existence_by_dtxsid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/check_existence_by_dtxsid_batch.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"id": "337693",
"cpdataCount": 292,
"inchikey": "IISBACLAFKSPIT-UHFFFAOYSA-N",
"wikipediaArticle": "Bisphenol A",
"dtxsid": "DTXSID7020182",
Expand Down Expand Up @@ -34,6 +35,5 @@
"irisLink": "356",
"pprtvLink": null,
"descriptorStringTsv": "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0",
"isMarkush": false,
"cpdataCount": 292
"isMarkush": false
}
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
structure(list(url = "https://api-ccte.epa.gov/chemical/detail/search/by-dtxsid/?projection=chemicaldetailstandard",
status_code = 401L, headers = structure(list(`content-type` = "application/json;charset=ISO-8859-1",
`content-length` = "164", connection = "keep-alive",
date = "Wed, 22 May 2024 19:50:06 GMT", `strict-transport-security` = "max-age=31536000",
`x-content-type-options` = "nosniff", `x-vcap-request-id` = "52ef7dea-f1b8-488f-6322-2e510c403b56",
date = "Thu, 29 Aug 2024 19:35:38 GMT", `strict-transport-security` = "max-age=31536000",
`x-content-type-options` = "nosniff", `x-vcap-request-id` = "ff9e668f-1084-4aa4-5a68-da205e1fd0c6",
`x-xss-protection` = "1; mode=block", `x-frame-options` = "DENY",
`x-cache` = "Error from cloudfront", via = "1.1 052215bfd8d35ecb703b208e875bd350.cloudfront.net (CloudFront)",
`x-amz-cf-pop` = "ATL59-P8", `x-amz-cf-id` = "r9PRCLIOMQ-IjFNVD1DH3Q1a-9TjGXT6Dl5bYiHGTaorjaQQPtNi1Q=="), class = c("insensitive",
`x-cache` = "Error from cloudfront", via = "1.1 b5e757a7da6f6fe6261f56a8a9646880.cloudfront.net (CloudFront)",
`x-amz-cf-pop` = "IAD89-C1", `x-amz-cf-id` = "kDYQM2MNV0A6RiiLL5dBEuolojD4vRlgYLjbyQ0a5L29xc_VsoOmCA=="), class = c("insensitive",
"list")), all_headers = list(list(status = 401L, version = "HTTP/1.1",
headers = structure(list(`content-type` = "application/json;charset=ISO-8859-1",
`content-length` = "164", connection = "keep-alive",
date = "Wed, 22 May 2024 19:50:06 GMT", `strict-transport-security` = "max-age=31536000",
`x-content-type-options` = "nosniff", `x-vcap-request-id` = "52ef7dea-f1b8-488f-6322-2e510c403b56",
date = "Thu, 29 Aug 2024 19:35:38 GMT", `strict-transport-security` = "max-age=31536000",
`x-content-type-options` = "nosniff", `x-vcap-request-id` = "ff9e668f-1084-4aa4-5a68-da205e1fd0c6",
`x-xss-protection` = "1; mode=block", `x-frame-options` = "DENY",
`x-cache` = "Error from cloudfront", via = "1.1 052215bfd8d35ecb703b208e875bd350.cloudfront.net (CloudFront)",
`x-amz-cf-pop` = "ATL59-P8", `x-amz-cf-id` = "r9PRCLIOMQ-IjFNVD1DH3Q1a-9TjGXT6Dl5bYiHGTaorjaQQPtNi1Q=="), class = c("insensitive",
`x-cache` = "Error from cloudfront", via = "1.1 b5e757a7da6f6fe6261f56a8a9646880.cloudfront.net (CloudFront)",
`x-amz-cf-pop` = "IAD89-C1", `x-amz-cf-id` = "kDYQM2MNV0A6RiiLL5dBEuolojD4vRlgYLjbyQ0a5L29xc_VsoOmCA=="), class = c("insensitive",
"list")))), cookies = structure(list(domain = logical(0),
flag = logical(0), path = logical(0), secure = logical(0),
expiration = structure(numeric(0), class = c("POSIXct",
"POSIXt")), name = logical(0), value = logical(0)), row.names = integer(0), class = "data.frame"),
content = charToRaw("{\"title\":\"API Header Not Found\",\"detail\":\"Every API call should pass assigned API key through custom http header or query parameter. Request is missing x-api-key.\"}"),
date = structure(1716407406, class = c("POSIXct", "POSIXt"
), tzone = "GMT"), times = c(redirect = 0, namelookup = 3.9e-05,
connect = 0, pretransfer = 0.00015, starttransfer = 0.095333,
total = 0.095361)), class = "response")
date = structure(1724960138, class = c("POSIXct", "POSIXt"
), tzone = "GMT"), times = c(redirect = 0, namelookup = 4e-05,
connect = 0, pretransfer = 0.000123, starttransfer = 0.250141,
total = 0.250168)), class = "response")
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"id": "337693",
"inchikey": "IISBACLAFKSPIT-UHFFFAOYSA-N",
"wikipediaArticle": "Bisphenol A",
"cpdataCount": 292,
"dtxsid": "DTXSID7020182",
"dtxcid": "DTXCID30182",
"casrn": "80-05-7",
Expand Down Expand Up @@ -35,7 +36,6 @@
"irisLink": "356",
"pprtvLink": null,
"descriptorStringTsv": "0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0",
"isMarkush": false,
"cpdataCount": 292
"isMarkush": false
}
]
Loading

0 comments on commit 0f2b811

Please sign in to comment.