Boehringer-Ingelheim · christopher-mohr · Jan 23, 2025 · Jan 23, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
@@ -0,0 +1,52 @@
+name: "Build docs using pkgdown"
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    # also run on 'closed' to clean up the github pages dir
+    types:
+        - opened
+        - reopened
+        - synchronize
+        - closed
+
+concurrency: preview-${{ github.ref }}
+
+jobs:
+  docs:
+    name: "Build Docs"
+    runs-on: ubuntu-20.04
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, local::.
+          needs: website
+
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+        shell: Rscript {0}
+
+      - name: Deploy preview
+        if: github.event_name == 'pull_request'
+        uses: rossjrw/pr-preview-action@v1
+        with:
+          source-dir: docs
+
+      - name: Deploy docs
+        if: github.event_name != 'pull_request'
+        uses: JamesIves/github-pages-deploy-action@v4
+        with:
+          folder: docs
+          branch: gh-pages
+          clean-exclude: pr-preview/
+          force: false
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,53 @@
+# The purpose of this workflow is to execute `R CMD check` on different R and OS version.
+#
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: R-CMD-check
+
+jobs:
+  R-CMD-check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - { os: macOS-latest, r: "release" }
+          - { os: windows-latest, r: "release" }
+          - { os: ubuntu-latest, r: "devel", http-user-agent: "release" }
+          - { os: ubuntu-latest, r: "release" }
+
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          # TODO can't do that for now, since we don't have any publicly available example data
+          args: c("--no-build-vignettes", "--ignore-vignettes", "--no-manual")
+          build_args: c("--no-build-vignettes", "--ignore-vignettes", "--no-manual")
+          upload-snapshots: true
diff --git a/.lintr b/.lintr
@@ -0,0 +1,6 @@
+linters: linters_with_defaults(
+    object_length_linter = NULL,
+    object_usage_linter = NULL,
+    line_length_linter(120),
+    commented_code_linter = NULL
+    )
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,25 +1,31 @@
 Package: tso500R
 Type: Package
-Title: Parser For TSO500 Analysis Output Files 
-Version: 0.1.0
+Title: TSO500R(eader)
+Version: 0.2
 Authors@R: 
     c(person("Christopher", "Mohr", email = "[email protected]", role = c("aut", "cre")),
       person("Alexander", "Peltzer", email = "[email protected]", role = c("ctb")),
       person("Sophie", "Paul", email = "[email protected]", role = c("aut")))
-Description: TSO500R(eader) is an R package developed for Illumina TSO500 data. It can be used for importing and processing of files produced by the Illumina TSO500 DRAGEN analysis pipeline and the LocalApp.
-  The package provides different functions for parsing the various output files produced by the Illumina pipelines. This includes the quality control files as well as the analysis outputs. Besides, it offers functionality to integrate the different result types, e.g. small variants and amplifications. 
+Description: TSO500R(eader) is an R package developed for handling Illumina TruSight Oncology 500 data. It can be used for importing and processing of files produced by the Illumina TSO500 DRAGEN analysis pipeline and the LocalApp provided by Illumina.
+  The package provides different functions for parsing the various output files produced by the Illumina pipelines. This includes quality control files as well as the analysis outputs. Besides, it offers functionality to integrate the different result types, e.g. small variants and amplifications. 
   Other features include functions for basic plotting and export functionality for writing `RData` objects or to generate DRAGEN analysis pipeline samplesheets.
-License: MIT
+License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
-Depends:
+Imports:
+  ComplexHeatmap,
   dplyr,
   ggplot2,
+  gt,
   janitor,
-  magrittr,
+  jsonlite,
   openxlsx,
   purrr,
   readr,
+  readxl,
+  rlang,
   stringr,
-  tidyr
+  tibble,
+  tidyr,
+  vcfR
diff --git a/NAMESPACE b/NAMESPACE
@@ -21,8 +21,8 @@ export(cnv)
 export(cvo)
 export(filter_consequences)
 export(filter_depth)
-export(filter_for_Included_in_TMB)
 export(filter_for_cosmic_id)
+export(filter_for_included_in_tmb)
 export(filter_germline_db)
 export(filter_germline_proxi)
 export(generate_dragen_samplesheet)
@@ -45,6 +45,7 @@ export(get_small_variants)
 export(get_splice_variants)
 export(keep_consequences)
 export(make_qc_table)
+export(parse_illumina_samplesheet)
 export(parse_p_dot_notation)
 export(plot_af_density)
 export(plot_af_histogram)
@@ -78,15 +79,71 @@ export(tmb)
 export(write_multiqc_data)
 export(write_rdata_file)
 export(write_workbook)
+importFrom(dplyr,across)
+importFrom(dplyr,bind_cols)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
+importFrom(dplyr,coalesce)
 importFrom(dplyr,distinct)
+importFrom(dplyr,filter)
+importFrom(dplyr,full_join)
+importFrom(dplyr,group_by)
+importFrom(dplyr,if_else)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
+importFrom(dplyr,mutate_all)
+importFrom(dplyr,relocate)
 importFrom(dplyr,rename)
 importFrom(dplyr,select)
+importFrom(dplyr,summarise)
+importFrom(dplyr,tibble)
+importFrom(ggplot2,element_blank)
+importFrom(ggplot2,element_rect)
+importFrom(ggplot2,element_text)
+importFrom(ggplot2,theme)
+importFrom(gt,data_color)
+importFrom(gt,gt)
+importFrom(janitor,clean_names)
+importFrom(janitor,make_clean_names)
+importFrom(jsonlite,read_json)
+importFrom(openxlsx,addWorksheet)
+importFrom(openxlsx,createWorkbook)
+importFrom(openxlsx,saveWorkbook)
+importFrom(openxlsx,writeDataTable)
+importFrom(purrr,discard)
+importFrom(purrr,map)
+importFrom(purrr,map_chr)
+importFrom(purrr,map_dfr)
+importFrom(purrr,reduce2)
+importFrom(purrr,set_names)
+importFrom(purrr,walk)
+importFrom(readr,format_csv)
+importFrom(readr,read_csv)
+importFrom(readr,read_file)
+importFrom(readr,read_tsv)
+importFrom(rlang,.data)
+importFrom(stats,na.omit)
+importFrom(stringr,str_detect)
+importFrom(stringr,str_extract)
+importFrom(stringr,str_length)
+importFrom(stringr,str_remove)
+importFrom(stringr,str_remove_all)
+importFrom(stringr,str_replace)
+importFrom(stringr,str_replace_all)
+importFrom(stringr,str_split)
 importFrom(stringr,str_split_i)
+importFrom(stringr,str_sub)
+importFrom(tibble,add_column)
+importFrom(tibble,tibble)
+importFrom(tidyr,everything)
+importFrom(tidyr,extract)
+importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,replace_na)
+importFrom(tidyr,unnest)
+importFrom(tidyr,unnest_longer)
+importFrom(tidyr,unnest_wider)
+importFrom(utils,read.table)
+importFrom(utils,write.table)
 importFrom(vcfR,read.vcfR)
 importFrom(vcfR,vcfR2tidy)
diff --git a/R/annotations.R b/R/annotations.R
@@ -6,7 +6,7 @@
 #' @return list of data.frame
 #' @export
 read_annotation_data <- function(annotation_data_path, sheet_names){
-  annotations <- map(sheet_names, ~ readxl::read_excel(annotation_data_path, sheet = .x) %>% janitor::clean_names())
+  annotations <- map(sheet_names, ~ readxl::read_excel(annotation_data_path, sheet = .x) |> janitor::clean_names())
   names(annotations) <- sheet_names
   return(annotations)
 }
diff --git a/R/cnv.R b/R/cnv.R
@@ -3,28 +3,31 @@
 #' @description Read in a *CopyNumberVariants.vcf file
 #'
 #' @param cnv_file_path a file path to a *CopyNumberVariants.vcf file
+#' @param local_app specifies whether the data is coming from local app
 #'
 #' @return A cnv.output object
 #' 
 #' @export
-cnv <- function(cnv_file_path, local_app=FALSE){
+cnv <- function(cnv_file_path, local_app = FALSE) {
   new_cnv_output(cnv_file_path)
 }
 
 #' Constructor function for combined.cnv.output objects
 #' Not to be called directly
 #'
 #' @param cnv_file_path a file path to a *CopyNumberVariants.vcf file
-#' @param local_app specifies whether quality metrics are coming from local app
+#' @param local_app specifies whether the data is coming from local app
 #'
 #' @return A combined.cnv.output object
-new_cnv_output <- function(cnv_file_path, local_app=FALSE) {
+#'
+#' @importFrom dplyr tibble
+new_cnv_output <- function(cnv_file_path, local_app = FALSE) {
 
-  cnv_data = tibble(file = cnv_file_path) %>%
-    mutate(data = lapply(file, parse_vcf_to_df)) %>%
-    unnest(data) %>%
-    mutate(sample_id = str_replace(basename(file), "_CopyNumberVariants.vcf", "")) %>%
-    select(-file) %>%
+  cnv_data <- tibble(file = cnv_file_path) |>
+    mutate(data = lapply(file, parse_vcf_to_df)) |>
+    unnest(data) |>
+    mutate(sample_id = str_replace(basename(file), "_CopyNumberVariants.vcf", "")) |>
+    select(-file) |>
     relocate(sample_id)
 
   return(structure(cnv_data, class = "combined.cnv.output"))
@@ -33,44 +36,52 @@ new_cnv_output <- function(cnv_file_path, local_app=FALSE) {
 #' Read in a batch of *CopyNumberVariants.vcf files into a list of CNV objects
 #'
 #' @param cnv_directory a file path to a directory containing one of more *CopyNumberVariants.vcf files
-#' @param local_app specifies whether quality metrics are coming from local app
+#' @param local_app specifies whether the data is coming from local app
 #'
 #' @return A named list of combined.cnv.output objects
 #'
 #' @export
-read_cnv_data <- function(cnv_directory, local_app=FALSE){
+#'
+#' @importFrom purrr map set_names
+#' @importFrom stringr str_remove
+read_cnv_data <- function(cnv_directory, local_app = FALSE) {
   cnv_files <- list.files(
     path = cnv_directory,
     pattern = "*cnv\\.vcf$|*CopyNumberVariants\\.vcf$",
     recursive = TRUE,
     full.names = TRUE
   )
-  cnv_data <- map(cnv_files, cnv, local_app)  %>%
-    set_names(str_remove(basename(cnv_files), "\\.vcf$")) 
+  cnv_data <- map(cnv_files, cnv, local_app)  |>
+    set_names(str_remove(basename(cnv_files), "\\.vcf$"))
   cnv_data
 }
 
 #' Read in a batch of *CopyNumberVariants.vcf files into one dataframe
 #'
-#' @param tmb_directory a file path to a directory containing one of more
+#' @param cnv_directory a file path to a directory containing one of more
 #' *tmb.json files
 #'
 #' @return A dataframe with the read CNV data
-#' 
+#'
 #' @export
-summarize_cnv_data <- function(cnv_directory){
+#'
+#' @importFrom tibble tibble
+#' @importFrom stringr str_replace
+#' @importFrom tidyr unnest
+#' @importFrom dplyr relocate mutate
+summarize_cnv_data <- function(cnv_directory) {
   cnv_files <- list.files(
     path = cnv_directory,
     pattern = "*cnv\\.vcf$|*CopyNumberVariants\\.vcf$",
     recursive = TRUE,
     full.names = TRUE
   )
 
-  cnv_data = tibble(file = cnv_files) %>%
-    mutate(data = lapply(file, parse_vcf_to_df)) %>%
-    unnest(data) %>%
-    mutate(sample_id = str_replace(basename(file), "_CopyNumberVariants\\.vcf$", "")) %>%
-    select(-file) %>%
+  cnv_data <- tibble(file = cnv_files) |>
+    mutate(data = lapply(file, parse_vcf_to_df)) |>
+    unnest(data) |>
+    mutate(sample_id = str_replace(basename(file), "_CopyNumberVariants\\.vcf$", "")) |>
+    select(-file) |>
     relocate(sample_id)
 
   cnv_data