diff --git a/.Rbuildignore b/.Rbuildignore
index c06f5969..741ab6e7 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -11,4 +11,5 @@
^\.github$
^vignettes/articles$
^CRAN-SUBMISSION$
-
+CONTRIBUTING.md
+^revdep$
diff --git a/.github/workflows/rhub.yaml b/.github/workflows/rhub.yaml
new file mode 100644
index 00000000..74ec7b05
--- /dev/null
+++ b/.github/workflows/rhub.yaml
@@ -0,0 +1,95 @@
+# R-hub's generic GitHub Actions workflow file. It's canonical location is at
+# https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml
+# You can update this file to a newer version using the rhub2 package:
+#
+# rhub::rhub_setup()
+#
+# It is unlikely that you need to modify this file manually.
+
+name: R-hub
+run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}"
+
+on:
+ workflow_dispatch:
+ inputs:
+ config:
+ description: 'A comma separated list of R-hub platforms to use.'
+ type: string
+ default: 'linux,windows,macos'
+ name:
+ description: 'Run name. You can leave this empty now.'
+ type: string
+ id:
+ description: 'Unique ID. You can leave this empty now.'
+ type: string
+
+jobs:
+
+ setup:
+ runs-on: ubuntu-latest
+ outputs:
+ containers: ${{ steps.rhub-setup.outputs.containers }}
+ platforms: ${{ steps.rhub-setup.outputs.platforms }}
+
+ steps:
+ # NO NEED TO CHECKOUT HERE
+ - uses: r-hub/actions/setup@v1
+ with:
+ config: ${{ github.event.inputs.config }}
+ id: rhub-setup
+
+ linux-containers:
+ needs: setup
+ if: ${{ needs.setup.outputs.containers != '[]' }}
+ runs-on: ubuntu-latest
+ name: ${{ matrix.config.label }}
+ strategy:
+ fail-fast: false
+ matrix:
+ config: ${{ fromJson(needs.setup.outputs.containers) }}
+ container:
+ image: ${{ matrix.config.container }}
+
+ steps:
+ - uses: r-hub/actions/checkout@v1
+ - uses: r-hub/actions/platform-info@v1
+ with:
+ token: ${{ secrets.RHUB_TOKEN }}
+ job-config: ${{ matrix.config.job-config }}
+ - uses: r-hub/actions/setup-deps@v1
+ with:
+ token: ${{ secrets.RHUB_TOKEN }}
+ job-config: ${{ matrix.config.job-config }}
+ - uses: r-hub/actions/run-check@v1
+ with:
+ token: ${{ secrets.RHUB_TOKEN }}
+ job-config: ${{ matrix.config.job-config }}
+
+ other-platforms:
+ needs: setup
+ if: ${{ needs.setup.outputs.platforms != '[]' }}
+ runs-on: ${{ matrix.config.os }}
+ name: ${{ matrix.config.label }}
+ strategy:
+ fail-fast: false
+ matrix:
+ config: ${{ fromJson(needs.setup.outputs.platforms) }}
+
+ steps:
+ - uses: r-hub/actions/checkout@v1
+ - uses: r-hub/actions/setup-r@v1
+ with:
+ job-config: ${{ matrix.config.job-config }}
+ token: ${{ secrets.RHUB_TOKEN }}
+ - uses: r-hub/actions/platform-info@v1
+ with:
+ token: ${{ secrets.RHUB_TOKEN }}
+ job-config: ${{ matrix.config.job-config }}
+ - uses: r-hub/actions/setup-deps@v1
+ with:
+ job-config: ${{ matrix.config.job-config }}
+ token: ${{ secrets.RHUB_TOKEN }}
+ - uses: r-hub/actions/run-check@v1
+ with:
+ job-config: ${{ matrix.config.job-config }}
+ token: ${{ secrets.RHUB_TOKEN }}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..8a60bfcf
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,40 @@
+---
+title: "Contributing"
+---
+
+
+Filing issues
+-------------
+
+Please read these points carefully and follow them while filing issues.
+
+- **One issue for one purpose**. Don't add more than one *bug*, *feature request*, or *documentation request* on to the same issue. Take the time to read through the current issues to ensure your issue is not already listed.
+- If you've found a *bug*, thank you for reporting! Please include a reproducible example of your bug in the issue.
+- If you need *support* or have a general *question*, please consider asking the question on [StackOverflow](http://www.stackoverflow.com)
+- For the project contributors, please label new issues using the following rules:
+ - *bugs* should be labeled "bug"
+ - *feature requests* or *suggestions* should be labeled "enhancement"
+ - *questions* or *requests for support* should be labeled "question"
+
+Pull Requests
+-------------
+
+Please file an issue before creating PRs so that it can be discussed first *before* you invest time implementing it.
+
+1. Please create all pull requests (PR) against the `dev` branch.
+2. Create **one PR per feature/bug fix**. Each PR should be associated with an Issue.
+3. Create a branch for that feature/bug fix, named 'issue-N' where N is the Issue number, and use that as a base for your pull requests. Pull requests directly against your version of `dev` will not be accepted.
+4. Please squash temporary stage commits together before issuing a PR.
+5. All commit messages should have two components: (1) a header on the first line beginning with "issue-N:" and containing no more than 50 characters, and (2) a body with 1 empty line after the header then at least a sentence or two in the commit body detailing all changes and justifications. Lines in the commit body should be wrapped to no more than 72 characters per line, and can contain multiple paragraphs.[1](#myfootnote1)
+5. In your pull request's description, please state clearly as to what your PR does, i.e., what FR or bug your PR addresses, along with the issue number. For e.g, "Closes #13: Added CRAN version and monthly download bages."
+7. Please build and test the package using `R CMD check --as-cran` against your branch source package archive `.tar.gz` file. You may want to add `--no-manual`, `--no-build-vignettes` or `--ignore-vignettes` (R 3.3.0+) options to reduce dependencies required to perform check. PRs that fail `check` cannot be merged.
+8. The NEWS file also has to be updated while fixing or implementing an issue. It should mention the issue number and what the issue is being closed. Also add a "Thanks to @your_name for the PR".
+
+**References:** If you are not sure how to issue a PR, but would like to contribute, these links should help get you started:
+
+1. **[How to Github: Fork, Branch, Track, Squash and Pull request](https://gun.io/blog/how-to-github-fork-branch-and-pull-request/)**.
+2. **[Squashing Github pull requests into a single commit](http://eli.thegreenplace.net/2014/02/19/squashing-github-pull-requests-into-a-single-commit)**.
+
+*This guide was modified from the contributing guide for the [data.table](https://github.com/Rdatatable/data.table) repository*
+
+1: To make it easier to count the characters per line you can edit your $HOME/.vimrc ($HOME/_vimrc on Windows) to include ":set ruler" which will display the line and position numbers in the bottom right corner of the terminal when editing the commit messages.
diff --git a/DESCRIPTION b/DESCRIPTION
index 83180506..018d9849 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Type: Package
Package: ctxR
Title: Utilities for Interacting with the 'CTX' APIs
-Version: 1.0.0.9000
+Version: 1.1.0
Authors@R: c(
person("Paul", "Kruse", email = "kruse.paul@epa.gov",
role = c("aut", "cre"),
@@ -19,11 +19,11 @@ Authors@R: c(
)
Description: Access chemical, hazard, bioactivity, and exposure data from the
Computational Toxicology and Exposure ('CTX') APIs
- . 'ctxR' was developed to streamline the
- process of accessing the information available through the 'CTX' APIs
- without requiring prior knowledge of how to use APIs. Most data is also
- available on the CompTox Chemical Dashboard ('CCD')
- and other resources found at the
+ .
+ 'ctxR' was developed to streamline the process of accessing the information
+ available through the 'CTX' APIs without requiring prior knowledge of how to
+ use APIs. Most data is also available on the CompTox Chemical Dashboard
+ ('CCD') and other resources found at the
EPA Computational Toxicology and Exposure Online Resources
.
License: GPL (>= 3)
diff --git a/NEWS.md b/NEWS.md
index 3b2bb113..8a5e5cb7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,26 +1,50 @@
-# ctxR (development version)
+# ctxR 1.1.0
## Minor improvements and fixes
+* Updated exposition, corrected typos, and added references to the vignettes (#37).
+
+* Fixed broken link to `CONTRIBUTING.md` in README (@c1au6i0, #37)
+
+* Updated `get_chemical_synonym()`, `get_chemical_synonym_batch()` to handle returned data more efficiently (#30).
+
+* Updated documentation of internal helper functions `create_data.table_chemical_details()`, `prepare_word()` (#24).
+
+* Fixed `get_bioactivity_details()` to handle case when `mc6Param` field of returned data is NULL (#18).
+
+* Fixed `get_bioactivity_details()` to handle `m4id` parameter values that do not exist (#16).
+
* Adjusted `chemical_contains()`, `chemical_equal()`, and
`chemical_starts_with()` to handle http 400 errors and returned information from
those requests. Adjusted the `chemical_contains_batch()`,
-`chemical_equals_batch()`, and `chemical_starts_with_batch()` to return named
-lists of valid and invalid search results, with invalid results containing
-information from 400 errors (@kisaacs1, #11).
+`chemical_equals_batch()`, and `chemical_starts_with_batch()` to return named lists of valid and invalid search results, with invalid results containing information from 400 errors (@kisaacs1, #12).
+
+* Added examples in `Chemical.Rmd` vignette for `get_chemical_details_batch()` (#8).
## New features
-* Added `check_existence_by_dtxsid()`, `check_existence_by_dtxsid_batch()`
-functions. Updated the `Chemical.Rmd` vignette to include examples of how to use
-these functions (#27).
+* Added warning for missing `API_key` parameter and missing stored API key to all functions that wrap API endpoints (#35).
+
+* Added error handling for invalid API key inputs to all functions that wrap an API endpoint (#33).
+
+* Added functions `get_all_list_types()`, `get_chemicals_in_list_start()`, `get_chemicals_in_list_start_batch()` `get_chemicals_in_list_exact()`, `get_chemicals_in_list_exact_batch()`, `get_chemicals_in_lists_contain()`, `get_chemicals_in_lists_contain_batch`(). Added `gsid` parameter to `get_chemical_image()`. Updated `Chemical.rmd` vignette to include examples for new chemical functions and new features (#30).
+
+* Added `check_existence_by_dtxsid()`, `check_existence_by_dtxsid_batch()` functions. Updated the `Chemical.Rmd` vignette to include examples of how to use these functions and `ctxR` hex logo location (#28).
+
+* Added parameter `limit` and default value 200 to `generate_ranges()` function and fixed. Fixed request limit in `chemical_equal_batch()` function (@seanthimons, #26).
+
+* Added pkgdown website for development version of package (#22).
+
+* Added monthly download and CRAN version badges to `README.md` and `README.Rmd` files (#14).
* Added `get_httk_data()`, `get_httk_data_batch()`,
`get_general_exposure_prediction()`, `get_general_exposure_prediction_batch()`,
`get_demographic_exposure_prediction()`,
`get_demographic_exposure_prediction_batch()` functions. Updated the
-`Exposure.Rmd` vignette to include examples of how to use these functions (#6).
+`Exposure.Rmd` vignette to include examples of how to use these functions (#10).
+
+* Added pkgdown site for CRAN version of `ctxR` (#20).
# ctxR 1.0.0
diff --git a/README.Rmd b/README.Rmd
index acbc5120..2c9345c2 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -28,11 +28,11 @@ Package website: [release](https://usepa.github.io/ctxR/) | [dev](https://usepa.
## Welcome to the GitHub repository for the ctxR package
-
+
ctxR was developed to streamline the process of accessing the information available through the [Computational Toxicology and Exposure (CTX) APIs](https://www.epa.gov/comptox-tools/computational-toxicology-and-exposure-apis) without requiring prior knowledge of how to use APIs. Chemical, hazard, bioactivity, and exposure data in available from the CTX APIs. Most data is also available on the [CompTox Chemical Dashboard (CCD)](https://comptox.epa.gov/dashboard/) or within other [Computational Toxicology and Exposure Online Resources](https://www.epa.gov/comptox-tools).
-If you are interested in contributing or want to report a bug, please submit a issue or start a discussion. See [CONTRIBUTING](CONTRIBUTING.md) for more information.
+If you are interested in contributing or want to report a bug, please submit a issue or start a discussion. See [CONTRIBUTING](https://github.com/USEPA/ctxR/blob/dev/CONTRIBUTING.md) for more information.
To install the current development version, run the following command:
diff --git a/README.md b/README.md
index 8a78b190..41d5b929 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Downloads](https://cranlogs.r-pkg.org/badges/last-month/ctxR?color=7BAFD4)](http
## Welcome to the GitHub repository for the ctxR package
-
+
ctxR was developed to streamline the process of accessing the
information available through the [Computational Toxicology and Exposure
@@ -32,8 +32,7 @@ is also available on the [CompTox Chemical Dashboard
Resources](https://www.epa.gov/comptox-tools).
If you are interested in contributing or want to report a bug, please
-submit a issue or start a discussion. See
-[CONTRIBUTING](CONTRIBUTING.md) for more information.
+submit a issue or start a discussion.
To install the current development version, run the following command:
diff --git a/cran-comments.md b/cran-comments.md
index 079fc53c..87ab6b60 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,50 +1,34 @@
## Test environments
+* macOS 13.3.1, Apple clang version 14.0.0 (clang-1400.0.29.202), GNU Fortran (GCC) 12.2.0, macOS Ventura 13.3.1
+* R Under development (unstable) (2024-11-26 r87383 ucrt), Windows Server 2022 x64 (build 20348)
* local Windows 10 install, R 4.4.0
-* R Under development (unstable) (2024-06-27 r86847 ucrt),
-Windows Server 2022 x64 (build 20348)
-
-
-* Rhub/actions Windows Server 2022 x64 (build 20348)
-* Rhub/actions Ubuntu 22.04.4 LTS, clang-asan, R-devel (2024-06-27 r86847)
-* Rhub/actions macos-13 on GitHub, Apple clang version 14.0.0
-(clang-1400.0.29.202), GNU Fortran (GCC) 12.2.0, macOS Venture 13.6.7
+* Rhub/actions Ubuntu 22.04.5 LTS, clang-asan, R-devel (2024-11-26 r87383)
## rhub CMD check results
Status: OK
-* elapsed time (Windows Server 2022 x64) : 1:50
-* elapsed time (Ubuntu 22.04.4 LTS) : 0:56
-* elapsed time (macos-13 on GitHub) : 1:50
+* elapsed time (Ubuntu 22.04.4 LTS) : 1:38
+
+## mac release check results
+
+0 errors | 0 warnings | 0 notes
## win devel check results
-1 NOTE - CRAN incoming feasibility
-Indicated possibly misspelled words in DESCRIPTION (APIs, CompTox, bioactivity).
-These are all correctly spelled.
+
+* 1 NOTE - CRAN incoming feasibility.
+ Indicated possible invalid URL. This URL works correctly and is stored in data retrieved from an API request, so we cannot alter it.
## local R CMD check results
-* checking CRAN incoming feasibility ... NOTE
- Maintainer: 'Paul Kruse '
-
- New submission
-
-
-0 errors | 0 warnings | 1 note
-
-* This is a new release. It is a renamed version of the ccdR 1.0.0 package.
-There are a few reasons for renaming this package. ccdR was developed when the
-APIs it wraps were primarily pulling data from the CompTox Chemicals Dashboard
-(CCD) and named to reflect that. More data is now available from the APIs than
-is represented by the CCD. The APIs have been renamed to be the
-Computational Toxicology and Exposure (CTX) APIs, which is a stable name that
-more appropriately represents the data domain and area of research the tools
-and resources related to the data represent. ctxR is much more representative
-than ccdR of the current APIs and data. Additionally, the US EPA is
-coordinating several API clients written in different languages to use the
-consistent package name ctx_, where '_' is used to represent the language in
-which a client is developed (e.g. ctxR for R, ctxPy for Python). Renaming ccdR
-to ctxR reflects this harmonization of CTX API clients across languages.
+0 errors | 0 warnings | 0 notes
+
+## revdepcheck results
+
+We checked 1 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
+
+ * We saw 0 new problems
+ * We failed to check 0 packages
diff --git a/revdep/.gitignore b/revdep/.gitignore
new file mode 100644
index 00000000..111ab324
--- /dev/null
+++ b/revdep/.gitignore
@@ -0,0 +1,7 @@
+checks
+library
+checks.noindex
+library.noindex
+cloud.noindex
+data.sqlite
+*.html
diff --git a/revdep/README.md b/revdep/README.md
new file mode 100644
index 00000000..00e64197
--- /dev/null
+++ b/revdep/README.md
@@ -0,0 +1,35 @@
+# Platform
+
+|field |value |
+|:--------|:----------------------------------------|
+|version |R version 4.4.0 (2024-04-24 ucrt) |
+|os |Windows 11 x64 (build 22621) |
+|system |x86_64, mingw32 |
+|ui |RStudio |
+|language |(EN) |
+|collate |English_United States.utf8 |
+|ctype |English_United States.utf8 |
+|tz |America/New_York |
+|date |2024-11-27 |
+|rstudio |2024.04.0+735 Chocolate Cosmos (desktop) |
+|pandoc |NA |
+
+# Dependencies
+
+|package |old |new |Δ |
+|:----------|:-----|:----------|:--|
+|ctxR |1.0.0 |1.0.0.9000 |* |
+|askpass |NA |1.2.1 |* |
+|cli |NA |3.6.3 |* |
+|cpp11 |NA |0.5.0 |* |
+|curl |NA |6.0.1 |* |
+|data.table |NA |1.16.2 |* |
+|glue |NA |1.8.0 |* |
+|jsonlite |NA |1.8.9 |* |
+|openssl |NA |2.2.2 |* |
+|Rcpp |NA |1.0.13-1 |* |
+|sys |NA |3.4.3 |* |
+|withr |NA |3.0.2 |* |
+
+# Revdeps
+
diff --git a/revdep/cran.md b/revdep/cran.md
new file mode 100644
index 00000000..782ef684
--- /dev/null
+++ b/revdep/cran.md
@@ -0,0 +1,7 @@
+## revdepcheck results
+
+We checked 1 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
+
+ * We saw 0 new problems
+ * We failed to check 0 packages
+
diff --git a/revdep/failures.md b/revdep/failures.md
new file mode 100644
index 00000000..9a207363
--- /dev/null
+++ b/revdep/failures.md
@@ -0,0 +1 @@
+*Wow, no problems at all. :)*
\ No newline at end of file
diff --git a/revdep/problems.md b/revdep/problems.md
new file mode 100644
index 00000000..9a207363
--- /dev/null
+++ b/revdep/problems.md
@@ -0,0 +1 @@
+*Wow, no problems at all. :)*
\ No newline at end of file
diff --git a/vignettes/Bioactivity.Rmd b/vignettes/Bioactivity.Rmd
index a70e023c..fb29b130 100644
--- a/vignettes/Bioactivity.Rmd
+++ b/vignettes/Bioactivity.Rmd
@@ -54,13 +54,13 @@ registerS3method(
```
-
+
# Introduction
In this vignette, [CTX Bioactivity API](https://api-ccte.epa.gov/docs/bioactivity.html) will be explored.
-Data provided by the API's Bioactivity endpoints are sourced from ToxCast's invitrodb.
+Data provided by the API's Bioactivity endpoints are sourced from ToxCast's [invitrodb](https://doi.org/10.23645/epacomptox.6062623.v13).
US EPA's Toxicity Forecaster (ToxCast) program makes *in vitro* medium- and high-throughput screening assay data publicly available for prioritization and hazard characterization of thousands of chemicals.
@@ -80,7 +80,7 @@ The Bioactivity API endpoints are organized into two different resources, "Assay
"Data" resource endpoints are split into summary data (by 'aeid') and bioactivity data by 'm4id' (i.e. both 'aeid' and 'spid'). The summary endpoint returns the number of active hits and total multi- and single-concentration chemicals tested for specific 'aeids'. The other endpoints return chemical information, level 3 concentration-response values, level 4 fit parameters, level 5 hit parameters, and level 6 flags for individual chemicals tested for given 'AEIDs', 'm4ids', 'SPIDs', or 'DTXSIDs'.
-Several ctxR functions can be used to access the CTX Bioactivity API data, as described in the following sections. Tables output in each example have been filtered to only display the first few rows of data. Regular ToxCast users may find it easier to use the [tcpl R Package](https://CRAN.R-project.org/package=tcpl), which is integrated ctxR's bioactivity functions to access API data in a more 'invitrodb'-like format. See the [tcpl's Data Retrieval via API](https://CRAN.R-project.org/package=tcpl) vignette for more guidance on data retrieval and plotting capabilities with tcpl.
+Several ctxR functions can be used to access the CTX Bioactivity API data, as described in the following sections. Tables output in each example have been filtered to only display the first few rows of data. Regular ToxCast users may find it easier to use the [tcpl R Package](https://CRAN.R-project.org/package=tcpl), which has integrated ctxR's bioactivity functions to access API data in a more 'invitrodb'-like format. See the [tcpl's Data Retrieval via API](https://CRAN.R-project.org/package=tcpl) vignette for more guidance on data retrieval and plotting capabilities with tcpl.
::: {.noticebox data-latex=""}
**NOTE:** Please see the introductory vignette for an overview of the *ctxR* package and initial set up instruction with API key storage.
diff --git a/vignettes/Chemical.Rmd b/vignettes/Chemical.Rmd
index b80763d3..48a09085 100644
--- a/vignettes/Chemical.Rmd
+++ b/vignettes/Chemical.Rmd
@@ -63,7 +63,7 @@ registerS3method(
## Introduction
-
+
In this vignette, [CTX Chemical API](https://api-ccte.epa.gov/docs/chemical.html) will be explored.
@@ -71,10 +71,10 @@ The foundation of toxicology, toxicokinetics, and exposure is embedded in the ph
With cheminformatics as the backbone for research efforts, sources of available data through the CTX Chemical API include:
-- Chemical structures, nomenclature, synonyms, IDs, list associations, physicochemical property, environmental fate and transport data from the Distributed Structure-Searchable Toxicity ([DSSTox](https://www.epa.gov/comptox-tools/distributed-structure-searchable-toxicity-dsstox-database)) database. For early references, see [(Richard, A. et al. 2002)](https://doi.org/10.1016/S0027-5107(01)00289-5), [(Richard, A. et al. 2006)](https://files.toxplanet.com/cpdb/pdfs/structure_tox_on_web.pdf), and [(Richard, A. et al 2008)](https://doi.org/10.1080/15376510701857452).
+- Chemical structures, nomenclature, synonyms, IDs, list associations, physicochemical property, environmental fate and transport data from the Distributed Structure-Searchable Toxicity ([DSSTox](https://www.epa.gov/comptox-tools/distributed-structure-searchable-toxicity-dsstox-database)) database. DSSTox substance identifiers (DTXSIDs) support linking chemical information to a specific chemical across a variety of EPA chemical resources. For early references, see [(Richard, A. et al. 2002)](https://doi.org/10.1016/S0027-5107(01)00289-5), [(Richard, A. et al. 2006)](https://files.toxplanet.com/cpdb/pdfs/structure_tox_on_web.pdf), and [(Richard, A. et al 2008)](https://doi.org/10.1080/15376510701857452).
- Predictions from Toxicity Estimation Software Tool ([TEST](https://www.epa.gov/comptox-tools/toxicity-estimation-software-tool-test)) suite of QSAR models. For early references, see [(Martin, T. et al. 2001)](https://pubs.acs.org/doi/10.1021/tx0155045), [(Martin, T. et al. 2007)](https://doi.org/10.1080/15376510701857353), and [(Young, D. et al. 2008)]( https://doi.org/10.1002/qsar.200810084).
-More information on Chemicals and Chemistry Data can be found here: .
+More information on Chemicals and Chemistry Data can be found here: .
::: {.noticebox data-latex=""}
**NOTE:** Please see the introductory vignette for an overview of the *ctxR* package and initial set up instruction with API key storage.
@@ -180,7 +180,7 @@ search_contains <- chemical_contains(word = 'DTXSID702018')
## Subset for MS-Ready Structures
-MS-Ready data can be retrieved using a variety of input information. Examples for each are provided below:
+MS-Ready [(McEachran, A. et al. 2018)](https://doi.org/10.1186/s13321-018-0299-2) data can be retrieved using a variety of input information. Examples for each are provided below:
### By Mass Range
@@ -203,7 +203,7 @@ msready_by_dtxcid <- get_msready_by_dtxcid(DTXCID = 'DTXCID30182')
# List Resource
-There are several lists of chemicals one can access. These can be filtered by the type, name, inclusion of a specific chemical, or name of list.
+There are several lists of chemicals one can access using the [(CCD list search)](https://comptox.epa.gov/dashboard/chemical-lists). These can be filtered by the type, name, inclusion of a specific chemical, or name of list.
## Get all list types
@@ -266,7 +266,7 @@ chemicals_in_list <- get_chemicals_in_list(list_name = 'CCL4')
# Chemical File Resource
-There a mrv, mol, and image files that can be accessed using either the DTXSID or DTXCID. Examples are provided below:
+There are mrv, mol, and image files that can be accessed using either the DTXSID or DTXCID. Examples are provided below:
## Get mrv by DTXSID or DTXCID
@@ -314,7 +314,7 @@ The fourth Drinking Water Contaminant Candidate List (CCL4) is a set of chemical
These lists can be found in the CCD at [CCL4](https://comptox.epa.gov/dashboard/chemical-lists/CCL4) with additional information at [CCL4 information](https://www.epa.gov/ccl/contaminant-candidate-list-4-ccl-4-0) and [NATADB](https://comptox.epa.gov/dashboard/chemical-lists/NATADB) with additional information at [NATA information](https://www.epa.gov/national-air-toxics-assessment). The quotes from the previous paragraph were excerpted from list detail descriptions found using the CCD links.
-In this example use case, physico-chemical Properties data will be compared between a water contaminant priority and an air toxics list.
+In this example use case, physico-chemical Properties data will be compared between a water contaminant priority and an air toxics list. Note, the following code chunks use the `data.table` object, which is an extension of the `data.frame` object and has slightly different syntax. For more information, please refer to [data.table](https://CRAN.R-project.org/package=data.table)
## Obtain Lists of Chemicals
@@ -385,20 +385,20 @@ ccl4_phys_chem[propertyId == 'melting-point', .(Mean = mean(value)),
These results tell us about some of the reported physico-chemical properties of the data sets.
-The mean "boiling-point" is 251.1072 degrees Celsius for CCL4, with mean values of 250.5943 and 251.4001 for experimental and predicted, respectively. The mean "melting-point" is 33.93924 degrees Celsius for CCL4, with mean values of 23.18876 and 47.98422 for experimental and predicted, respectively.
+The mean "boiling-point" is 252.6593 degrees Celsius for CCL4, with mean values of 250.5943 and 253.8196 for experimental and predicted, respectively. The mean "melting-point" is 34.91613 degrees Celsius for CCL4, with mean values of 23.18876 and 49.99417 for experimental and predicted, respectively.
-To explore **all** the values of the physico-chemical properties and calculate their means, we can do the following procedure. First we look at all the physico-chemical properties individually, then group them by each property ("boiling-point", "melting-point", etc...), and then additionally group those by property type ("experimental" vs "predicted"). In the grouping, we look at the columns `value`, `unit`, `propertyID` and `propType`. We also demonstrate how take the mean of the values for each grouping.
+To explore **all** the values of the physico-chemical properties and calculate their means, we can do the following procedure. First we look at all the physico-chemical properties individually, then group them by each property ("boiling-point", "melting-point", etc...), and then additionally group those by property type ("experimental" vs "predicted"). In the grouping, we look at the columns `value`, `unit`, `propertyID` and `propType`. We also demonstrate how take the mean of the values for each grouping, using the chemical identifier 'DTXSID1037567' for this example, the 25th chemical in CCL4.
```{r fig.align='center',class.source="scroll-300",message=FALSE}
-head(ccl4_phys_chem[dtxsid == ccl4$dtxsid[[1]], ])
-ccl4_phys_chem[dtxsid == ccl4$dtxsid[[1]], .(propType, value, unit),
+head(ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], ])
+ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], .(propType, value, unit),
by = .(propertyId)]
-ccl4_phys_chem[dtxsid == ccl4$dtxsid[[1]], .(value, unit),
+ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], .(value, unit),
by = .(propertyId, propType)]
-ccl4_phys_chem[dtxsid == ccl4$dtxsid[[1]], .(Mean_value = sapply(.SD, mean)),
+ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], .(Mean_value = sapply(.SD, mean)),
by = .(propertyId, unit), .SDcols = c("value")]
-ccl4_phys_chem[dtxsid == ccl4$dtxsid[[1]], .(Mean_value = sapply(.SD, mean)),
+ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], .(Mean_value = sapply(.SD, mean)),
by = .(propertyId, unit, propType),
.SDcols = c("value")][order(propertyId)]
```
diff --git a/vignettes/Exposure.Rmd b/vignettes/Exposure.Rmd
index d314ba52..7b6d578e 100644
--- a/vignettes/Exposure.Rmd
+++ b/vignettes/Exposure.Rmd
@@ -54,19 +54,21 @@ registerS3method(
)
```
-
+
## Introduction
In this vignette, the [CTX Exposure API](https://api-ccte.epa.gov/docs/exposure.html) will be explored.
-Data provided by the Exposure API are broadly organized in three different areas, Functional Use Information, Product Data, and List Presence Data. These data (except for the Functional Use Probability endpoint) are developed from publicly available documents and are also accessible using the Chemical Exposure Knowledgebase ([ChempExpo](https://comptox.epa.gov/chemexpo/)) interactive web application developed by the United States Environmental Protection Agency. The underlying database for both the Exposure API and ChemExpo is the Chemicals and Products Database (CPDat). CPDat provides reported information on how chemicals are used in commerce and (where possible) at what quantities they occur in consumer and industrial products; see [Dionisio et al. (2018)](https://www.nature.com/articles/sdata2018125) for more information on CPDat. The data provided by the Functional Use Probability endpoint are predictions from EPA's Quantitative Structure Use Relationship (QSUR) models [Phillips et al. (2017)](https://pubs.rsc.org/en/content/articlelanding/2017/gc/c6gc02744j).
+Data provided by the Exposure API are broadly organized in four different areas, Functional Use Information, Product Data, List Presence Data, and Exposure estimates. Data from the Functional Use, Product Data, and List Presence resources (aside from the Functional Use Probability endpoint) are developed from publicly available documents and are also accessible using the Chemical Exposure Knowledgebase ([ChempExpo](https://comptox.epa.gov/chemexpo/)) interactive web application developed by the United States Environmental Protection Agency. The underlying database for the Functional Use, Product Data, and List Presence endpoints of the Exposure API and ChemExpo is the Chemicals and Products Database (CPDat). CPDat provides reported information on how chemicals are used in commerce and (where possible) at what quantities they occur in consumer and industrial products; see [(Dionisio et al. 2018)](https://www.nature.com/articles/sdata2018125) for more information on CPDat. The data provided by the Functional Use Probability endpoint are predictions from EPA's Quantitative Structure Use Relationship (QSUR) models [(Phillips et al. 2017)](https://pubs.rsc.org/en/content/articlelanding/2017/gc/c6gc02744j). Exposure data is represented by predictions from the [`httk`](https://CRAN.R-project.org/package=httk) R package, introduced in [(Pearce, R. et al. 2017)](https://doi.org/10.18637%2Fjss.v079.i04) and several exposure models including the SEEM models. Information on the SEEM2 model can be found at [(Wambaugh, J. et al. 2014)](http://dx.doi.org/10.1021/es503583j) and on the SEEM3 model can be found at [(Ring, C. et al. 2018)](http://dx.doi.org/10.1021/acs.est.8b04056)
-Product Data are organized by harmonized Product Use Categories (PUCs). The PUCs are assigned to products (which are associated with Composition Documents) and indicate the type of product associated to each data record. They are organized hierarchicially, with General Category containing Product Family, which in turn contains Product Type. The Exposure API also provide information on how the PUC was assigned. Do note that a Machine Learning model is used to assign PUCs with the "classificationmethod" equal to "Automatic". As such, these assignments may be incorrect. More information on PUC categories can be found in [Isaacs et al. (2020)](https://doi.org/10.1038/s41370-019-0187-5).
+Product Data are organized by harmonized Product Use Categories (PUCs). The PUCs are assigned to products (which are associated with Composition Documents) and indicate the type of product associated to each data record. They are organized hierarchicially, with General Category containing Product Family, which in turn contains Product Type. The Exposure API also provide information on how the PUC was assigned. Do note that a natural language processing model is used to assign PUCs with the "classificationmethod" equal to "Automatic". As such, these assignments are less certain and may contain inaccuracies. More information on PUC categories can be found in [(Isaacs et al. 2020)](https://doi.org/10.1038/s41370-019-0187-5).
-List Presence Data reflect the occurrence of chemicals on lists present in publicly available documents (sourced from a variety of federal and state agencies and trade associations). These lists are tagged with List Presence Keywords (LPKs) that together describe information contained in the document relevant to how the chemical was used. LPKs are an updated version of the cassettes provided in the Chemical and Product Categories (CPCat) database; see [Dionisio et al. (2015)](https://www.sciencedirect.com/science/article/pii/S2214750014001632?via%3Dihub). For the most up to date information on the current LPKs and to see how the CPCat cassettes were updated, see [Koval et al. (2022)](https://www.nature.com/articles/s41370-022-00451-8).
+List Presence Data reflect the occurrence of chemicals on lists present in publicly available documents (sourced from a variety of federal and state agencies and trade associations). These lists are tagged with List Presence Keywords (LPKs) that together describe information contained in the document relevant to how the chemical was used. LPKs are an updated version of the cassettes provided in the Chemical and Product Categories (CPCat) database; see [(Dionisio et al. 2015)](https://doi.org/10.1016/j.toxrep.2014.12.009). For the most up to date information on the current LPKs and to see how the CPCat cassettes were updated, see [(Koval et al. 2022)](https://www.nature.com/articles/s41370-022-00451-8).
-Both reported and predicted Function Use Information is available. Reported functional use information is organized by harmonized Function Categories (FCs) that describe the role a chemical serves in a product or industrial process. The harmonized technical function categories and definitions were developed by the Organization for Economic Co-operation and Development (OECD) (with the exception of a few categories unique to consumer products which are noted as being developed by EPA). These categories have been augmented with additional categories needed to describe chemicals in personal care, pharmaceutical, or other commercial sectors. The reported function data form the basis for ORD's QSUR models [(Phillips et al. (2016))](https://pubs.rsc.org/en/content/articlelanding/2017/GC/C6GC02744J). These models provide the structure-based predictions of chemical function available in the Functional Use Probability endpoint. Note that these models were developed prior to the OECD function categories, so their function categories are not yet aligned with the harmonized categories used in the reported data. Updated models for the harmonized categories are under development.
+Both reported and predicted Function Use Information is available. Reported functional use information is organized by harmonized Function Categories (FCs) that describe the role a chemical serves in a product or industrial process. The harmonized technical function categories and definitions were developed by the Organisation for Economic Co-operation and Development (OECD) (with the exception of a few categories unique to consumer products which are noted as being developed by EPA). These categories have been augmented with additional categories needed to describe chemicals in personal care, pharmaceutical, or other commercial sectors. The reported function data form the basis for ORD's QSUR models [(Phillips et al. 2016)](https://pubs.rsc.org/en/content/articlelanding/2017/GC/C6GC02744J). These models provide the structure-based predictions of chemical function available in the Functional Use Probability endpoint. Note that these models were developed prior to the OECD function categories, so their function categories are not yet aligned with the harmonized categories used in the reported data. Updated models for the harmonized categories are under development.
+
+The R package `httk` provides users with a variety of tools to incorporate toxickinetics and in vitro-in vivo extrapolation into bioinformatics and comes with pre-made models that can be used with specific chemical data. The SEEM models were developed to provide predictions for potential human exposure to chemicals with little or no exposure data. For SEEM2, Bayesian methods were used to infer ranges of exposure consistent with data from the National Health and Nutrition Examination Survey. Predictions for different demographic groups were made. For SEEM3, chemical exposures through four different pathways were predicted and in turn weighting of different models through these exposure pathways was conducted to produce consensus predictions.
Information for ChemExpo is sourced from: Sakshi Handa, Katherine A. Phillips, Kenta Baron-Furuyama, and Kristin K. Isaacs. 2023. “ChemExpo Knowledgebase User Guide”. https://comptox.epa.gov/chemexpo/static/user_guide/index.html.
@@ -80,9 +82,9 @@ Several ctxR functions can be used to access the CTX Exposure API data, as descr
Functional uses for chemicals may be searched.
-## Exposure Functional Use
+## Functional Use
-`get_exposure_functional_use()` retrieves FCs and associated exposure data for a specific chemical (by DTXSID).
+`get_exposure_functional_use()` retrieves FCs and associated metadata for a specific chemical (by DTXSID).
```{r exposure functional use}
exp_fun_use <- get_exposure_functional_use(DTXSID = 'DTXSID7020182')
@@ -94,9 +96,9 @@ knitr::kable(head(exp_fun_use)) %>%
kableExtra::scroll_box(width = "100%")
```
-## Exposure Functional Use Probability
+## Functional Use Probability
-`get_exposure_functional_use_probability()` retrieves the probability of functional use within different FCs for a given chemical (by DTXSID). Note, this is not probability of how the chemical is used across all categories but rather the probability within each FC that the chemical is used.
+`get_exposure_functional_use_probability()` retrieves the probability of functional use within different FCs for a given chemical (by DTXSID). Each value represents the probability of the chemical being classified as having this function, as predicted by the QSUR models.
```{r}
exp_fun_use_prob <- get_exposure_functional_use_probability(DTXSID = 'DTXSID7020182')
@@ -106,9 +108,9 @@ exp_fun_use_prob <- get_exposure_functional_use_probability(DTXSID = 'DTXSID7020
knitr::kable(head(exp_fun_use_prob))
```
-## Exposure Functional Use Categories
+## Functional Use Categories
-`get_exposure_functional_use_categories()` retrieves all the FCs. This is not specific to a chemical, but rather a list of all FCs.
+`get_exposure_functional_use_categories()` retrieves definitions of all the available FCs. This is not specific to a chemical, but rather a list of all FCs.
```{r}
exp_fun_use_cat <- get_exposure_functional_use_category()
@@ -118,20 +120,12 @@ exp_fun_use_cat <- get_exposure_functional_use_category()
knitr::kable(head(exp_fun_use_cat))
```
-### `httk` data
-
-There is a single resource that returns `httk` model data when available
-
-```{r}
-bpa_httk <- get_httk_data(DTXSID = 'DTXSID7020182')
-head(bpa_httk)
-```
# Product Data Resource
There are a few resources for retrieving product use data associated with chemical identifiers (DTXSID) or general use.
-## Exposure Product Data
+## Product Data
`get_exposure_product_data()` retrieves the product data (PUCs and related data) for products that use the specified chemical (by DTXSID).
@@ -145,9 +139,9 @@ knitr::kable(head(exp_prod_dat))%>%
kableExtra::scroll_box(width = "100%")
```
-## Exposure Product Use Category Data
+## Product Use Category Data
-`get_exposure_product_data_puc()` retrieves the PUCs. This is not specific to a chemical, but rather a list of all PUCs.
+`get_exposure_product_data_puc()` retrieves the definitions of all the PUCs. This is not specific to a chemical, but rather a list of all PUCs.
```{r}
exp_prod_data_puc <- get_exposure_product_data_puc()
@@ -157,13 +151,23 @@ exp_prod_data_puc <- get_exposure_product_data_puc()
knitr::kable(head(exp_prod_data_puc))
```
+# `httk` data
+
+There is a single resource that returns `httk` model data when available
+
+```{r}
+bpa_httk <- get_httk_data(DTXSID = 'DTXSID7020182')
+head(bpa_httk)
+```
+
+
# List Presence Resource
There are a few resources for retrieving list data for specific chemicals (by DTXSID) or general list presence information.
## List Presence Tags
-`get_exposure_list_presence_tags()` retrieves all the list presence tag information (including LPKs). This is not specific to a chemical, but rather a list of the the list presence tags.
+`get_exposure_list_presence_tags()` retrieves all the list presence keywords. This is not specific to a chemical, but rather a list of the the list presence keywords. Note that some List Presence Keywords align with PUCs, but the keywords are assigned to documents that refer to product category as a whole, while PUCs are assigned to documents referring to specific products (e.g., ingredient list).
```{r}
exp_list_tags <- get_exposure_list_presence_tags()
@@ -188,11 +192,11 @@ knitr::kable(head(exp_list_tags_dat))%>%
```
-### Exposure predictions
+### Exposure Predictions
-There are two functions that provide access to exposure prediction data. The first provides general information while the second provides information broken down by different demographic groups.
+There are two functions that provide access to exposure prediction data. The first provides general information on exposure pathways while the second provides exposure predictions from a variety of exposure models. The general information corresponds to SEEM3 predictions of exposure pathways, while the exposure predictions feature SEEM2 predictions broken down by demographic groups, general consensus predictions from SEEM3, and in some cases additional exposure predictions from other models
-#### General exposure predictions
+#### General Exposure Predictions
`get_general_exposure_prediction()` returns general exposure information for a given chemical.
@@ -200,7 +204,7 @@ There are two functions that provide access to exposure prediction data. The fir
bpa_general_exposure <- get_general_exposure_prediction(DTXSID = 'DTXSID7020182')
head(bpa_general_exposure)
```
-#### Demographic exposure predictions
+#### Demographic Exposure Predictions
`get_demographic_exposure_prediction()` returns exposure prediction information split across different demographics for a given chemical.
@@ -215,7 +219,7 @@ bpa_demographic_exposure
There are batch search versions for several endpoints that gather data specific to a chemical. Namely, `get_exposure_functional_use_batch()`, `get_exposure_functional_use_probability()`, `get_exposure_product_data_batch()`, `get_exposure_list_presence_tags_by_dtxsid_batch()`, `get_general_exposure_prediction_batch()`, and `get_demographic_exposure_prediction_batch()`. The function `get_exposure_functional_use_probability()` returns a data.table with each row corresponding to a unique chemical and each column representing a functional use category associated to at least one input chemical. The other batch functions return a named list of data.frames or data.tables, the names corresponding to the unique chemicals input and the data.frames or data.tables corresponding to the information to each individual chemical.
-## Functional use probability batch
+## Functional Use Probability Batch
We demonstrate how the individual results differ from the batch results when retrieving functional use probabilities.
diff --git a/vignettes/Hazard.Rmd b/vignettes/Hazard.Rmd
index 173dc7c3..1b450e0b 100644
--- a/vignettes/Hazard.Rmd
+++ b/vignettes/Hazard.Rmd
@@ -53,7 +53,7 @@ registerS3method(
)
```
-
+
## Introduction
@@ -137,7 +137,7 @@ In this example use case, hazard data will be compared between a water contamina
## Obtain Lists of Chemicals
-First, confirm the chemical list to query.
+First, confirm the chemical list to query. We use functions that wrap some of the Chemical domain endpoints to retrieve information about the list of chemicals.
```{r}
options(width = 100)
@@ -159,7 +159,7 @@ natadb <- data.table::as.data.table(natadb)
## Review Genotoxicity Data for a Single Chemical
-Using the standard CompTox Chemicals Dashboard approach to access genotoxicity hazard data, one would navigate to the individual chemical page as shown below.
+Using the standard CompTox Chemicals Dashboard approach to access genotoxicity hazard data, one would navigate to the individual chemical page for [DTXSID7020182](https://comptox.epa.gov/dashboard/chemical/genotoxicity/DTXSID7020182) as shown below.
@@ -198,7 +198,7 @@ colnames(ccl4_genotox)
head(ccl4_genotox)
```
-The information returned is of the first variety highlighted in the Figure 2, that is, summary data on the available genotoxicity data for each chemical. Observe genotoxicity data was returned for 71 chemicals from the CCL4 chemical list and 153 from the NATA chemical list. Chemicals missing genotoxicity information are noted.
+The information returned is of the first variety highlighted in the Figure 2, that is, summary data on the available genotoxicity data for each chemical. Observe genotoxicity data was returned for 71 chemicals from the CCL4 chemical list and 153 from the NATA chemical list. Chemicals missing genotoxicity data for each list are noted below.
```{r fig.align='center',class.source="scroll-300",message=FALSE}
ccl4[!(dtxsid %in% ccl4_genotox$dtxsid),
diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd
index d61e7f2a..ab061829 100644
--- a/vignettes/Introduction.Rmd
+++ b/vignettes/Introduction.Rmd
@@ -52,7 +52,11 @@ registerS3method(
)
```
-
+
+
+# Computational Toxicology and Exposure data
+
+In this document, users will be introduced to different methods to access computational toxicology and exposure data. The two main methods described are through the CompTox Chemicals Dashboard and the CTX APIs, via the R package *ctxR*. References to papers and additional information on resources can be found within the exposition of this document. For more detailed information on specific areas of data, please refer to the other vignettes included within *ctxR*.
# Introduction to the CCD
@@ -96,7 +100,7 @@ The web interface for batch search only allows input of 10,000 identifiers at a
## Challenges of web-based dashboard search
-Practicing researchers may follow a workflow that looks something like this:
+Practicing researchers may follow a workflow where many chemicals are being evaluated that looks something like this:
1. Start with a dataset that includes your chemical identifiers of interest. These may include chemical names, Chemical Abstract Service Registry Numbers (CASRNs), Distributed Searchable Structure-Toxicity Database (DSSTox) identifiers, or InChIKeys.
2. Export the chemical identifiers to a spreadsheet. Often, this is done by importing the data into an environment such as R or Python, in order to do some data wrangling (e.g., to select only the unique substance identfiers; to clean up improperly-formatted CASRNs; etc.). Then, the identifiers are saved in a spreadsheet (an Excel, .csv, or .txt file), one chemical identifier per row.
@@ -136,7 +140,7 @@ On the left side of each domain's web interface page, there will be several diff
## Authentication
-`Authentication`, found in upper left tab on each web interface page, is required to use the APIs. To authenticate themselves in the API web interface, the user must input their unique API key.
+`Authentication`, found in upper left tab on each web interface page, is required to use the APIs. To authenticate themselves in the API web interface, the user must input their unique API key. To request an API key, please contact the API support team at [ccte_api@epa.gov](mailto:ccte_api@epa.gov).
@@ -240,7 +244,7 @@ ctx_key()
# Quick Start Examples
-As some quick start examples, we demonstrate the ease* of retrieving the information across endpoints for Bisphenol A using *ctxR*. *This is in contrast to the approach using the CCD or API web interface.
+As some quick start examples, we demonstrate the relative ease (compared to using the CCD or API web interface) of retrieving the information across endpoints for Bisphenol A using *ctxR*.
Tables output in each example have been filtered to only display the first few rows of data. For additional examples and more comprehensive documentation on each endpoint, consider reviewing the other ctxR vignettes for the data domain of interest.