From 0c4b00dbd94426520fefc9ba77cc9a2e25ee4599 Mon Sep 17 00:00:00 2001 From: Edgar Manukyan Date: Mon, 17 Jun 2024 14:34:52 -0400 Subject: [PATCH] just testing --- vignettes/articles/algorithms.Rmd | 221 ---------- vignettes/articles/cm_raw_data.csv | 15 - vignettes/articles/dm.csv | 6 - vignettes/articles/events_domain.Rmd | 572 ------------------------- vignettes/articles/findings_domain.Rmd | 564 ------------------------ vignettes/articles/iso_8601.Rmd | 254 ----------- vignettes/articles/just-dm.Rmd | 24 ++ vignettes/articles/sdtm_ct.csv | 75 ---- vignettes/articles/study_sdtm_spec.Rmd | 361 ---------------- vignettes/articles/vitals_raw_data.csv | 7 - 10 files changed, 24 insertions(+), 2075 deletions(-) delete mode 100644 vignettes/articles/algorithms.Rmd delete mode 100644 vignettes/articles/cm_raw_data.csv delete mode 100644 vignettes/articles/dm.csv delete mode 100644 vignettes/articles/events_domain.Rmd delete mode 100644 vignettes/articles/findings_domain.Rmd delete mode 100644 vignettes/articles/iso_8601.Rmd create mode 100644 vignettes/articles/just-dm.Rmd delete mode 100644 vignettes/articles/sdtm_ct.csv delete mode 100644 vignettes/articles/study_sdtm_spec.Rmd delete mode 100644 vignettes/articles/vitals_raw_data.csv diff --git a/vignettes/articles/algorithms.Rmd b/vignettes/articles/algorithms.Rmd deleted file mode 100644 index 3854287d..00000000 --- a/vignettes/articles/algorithms.Rmd +++ /dev/null @@ -1,221 +0,0 @@ ---- -title: "Algorithms & Sub-Algorithms" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Algorithms & Sub-Algorithms} - %\VignetteEncoding{UTF-8} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - markdown: - wrap: 72 ---- - -## Core Concept - -SDTM mappings are defined as algorithms that transform the collected -(eCRF, eDT) source data into the target SDTM data model. Mapping -algorithms are the backbone of the {sdtm.oak} - SDTM data transformation -engine. - -**Key Points:** - -- Algorithms can be re-used across multiple SDTM domains. - -- Algorithms are pre-specified for data collection standards in MDR - (if applicable) to facilitate automation. - -- Programming language agnostic - this concept does not rely on a - specific programming language for implementation. The {sdtm.oak} team - implemented them as R functions. - -Here is an example of reusing an algorithm across multiple domains, -variables, and also to a non-standard - -![](reusable_algorithms.jpg){width="600px"} - -## List of Algorithms - -This release of {sdtm.oak} supports the following algorithms: assign_no_ct, assign_ct, hardcode_no_ct, hardcode_ct, assign_datetime, condition_add. Rest of the algorithms will be developed in the subsequent releases. - -The following table provides a brief description of each algorithm. - -```{r echo = FALSE, results = "asis"} -library(knitr) -algorithms <- data.frame( - `Algorithm Name` = c( - "assign_no_ct", - "assign_ct", - "assign_datetime", - "hardcode_ct", - "hardcode_no_ct", - "condition_add", - "ae_aerel", - "dataset_level", - "not_submitted", - "relrec", - "multiple_responses", - "split_to_suppqual", - "remove_dup", - "group_by", - "merge_datasets" - ), - `Description` = c( - paste( - "One-to-one mapping between the raw source and a target", - "SDTM variable that has no controlled terminology restrictions.", - "Just a simple assignment", - "statement." - ), - paste( - "One-to-one mapping between the raw source and a target ", - "SDTM variable that is subject to controlled terminology restrictions.", - "A simple assign statement and applying controlled terminology.", - "This will be used only if the SDTM variable has an associated", - "controlled terminology." - ), - paste( - "One-to-one mapping between the raw source and a target that involves ", - "mapping a Date or time or datetime component. This mapping algorithm", - "also takes care of handling unknown dates and converting them into.", - "ISO8601 format." - ), - paste( - "Mapping a hardcoded value to a target SDTM variable that is subject to terminology restrictions.", - "This will be used only if the SDTM variable has an associated", - "controlled terminology." - ), - paste( - "Mapping a hardcoded value to a target SDTM variable that has no terminology restrictions." - ), - paste( - "Algorithm that is used to filter the source data and/or target domain", - "based on a condition. The mapping will be applied only if the condition is met.", - "The filter can be applied either at the source dataset or at target dataset or both.", - " This algorithm has to be used in conjunction with other algorithms, that is if the", - " condition is met perform the mapping using algorithms like assign_ct,", - "assign_no_ct, hardcode_ct, hardcode_no_ct, assign_datetime." - ), - paste( - "Algorithm that is currently unique to AE.AEREL,", - "particularly when more than one drug is used in the study.
If any collected study drug", - "causalities are 'Yes' then AE.AEREL is Y.
If all collected study", - "drug causalities are 'NA' then AE.AEREL is NA.
If no study drug", - "causalities are 'Yes' but there is at least one causality of 'No'", - "then AE.AEREL is N.
Individual study drug causality responses are", - "stored in AERELn in SUPPAE." - ), - paste( - "Indicates a dataset-level mapping. These mappings will", - "be applied to all SDTM records created from that source.", - "Also called an eCRF-level mappings in eCRF and dataset-level", - "mappings in eDT" - ), - paste( - "Instruction that `{sdtm.oak}` should not map the collected item to SDTM at all." - ), - paste( - "Associate two domains based on the variables in each domain and how those are related.", - "Specifies the name of two domains that are related via RELREC." - ), - paste( - "Consolidate the responses from more than one source variable into one target variable.", - "Used when multiple responses may be given for a single SDTM column.", - "`{sdtm.oak}` will populate all target variable(s) after determining the number of responses provided." - ), - paste( - "Consolidates the responses from more than one", - "source variable into more than one target variable", - "(always a suppqual/non-standard variable).", - "There is no 'parent' target variable that is populated with 'MULTIPLE'." - ), - paste( - "Sub-algorithm at the domain level that indicates some source records may", - "be removed during the `{sdtm.oak}` mapping process if determined to be duplicate records." - ), - paste( - "Sub-algorithm used at the domain level to group source records", - "before mapping to SDTM. This is used in the event we need to collapse data", - "collected across multiple rows into one row in SDTM but it is not a simple", - "un-duplication effort. For example, the way infusion study drug", - "administration data requires us to create 1 SDTM record in EC from 1 or more sources", - "records. When there is more than one source record,", - "we need to take the earliest collected infusion start date (for ECSTDTC) and", - "the latest collected infusion end date within an eCRF instance." - ), - paste( - "To indicate a join condition with a secondary source or multiple sources.", - "Merges are expressed at the domain level only", - "(not at data point or variable level).", - "This is a sub-algorithm and can only be used with algorithm DATASET_LEVEL." - ) - ), - `Example` = c( - paste( - "MH.MHTERM
", - "AE.AETERM" - ), - paste("VS.VSPOS
", "VS.VSLAT"), - paste("MH.MHSTDTC
", "AE.AEENDTC"), - paste( - "MH.MHPRESP = 'Y'
", - "
VS.VSTEST = 'Systolic Blood Pressure'
", - "
VS.VSORRESU = 'mmHg'
" - ), - paste( - "FA.FASCAT = 'COVID-19 PROBABLE CASE'
", - "
CM.CMTRT = 'FLUIDS'" - ), - paste( - "If If MDPRIOR == 1 then CM.CMSTRTPT = 'BEFORE'.
", - "
VS.VSMETHOD when VSTESTCD = 'TEMP'
", - "
If collected value in raw variable DOS is numeric then CM.CMDOSE
", - "
If collected value in raw variable MOD is different to CMTRT then map to CM.CMMODIFY" - ), - paste("For AE.AEREL and AERELn in SUPPAE"), - paste( - "VS = 'Vital Signs'
", - "
MH.MHCAT = 'PROSTATE CANCER HISTORY'
" - ), - paste(""), - paste("BE record related to BS record via RELREC"), - paste( - "AE.AERELNST/ AERELNSn IN SUPPAE

", - "
DM.RACE, if only one value is selected.
", - "DM.RACE = MULTIPLE, if more than one value is selected.
", - "RACEn in SUPPDM where n = 1 to N selected values" - ), - paste( - "If both Filipino and Samoan are checked,", - "CRACE1 will be 'FILIPINO' and CRACE2 will be 'SAMOAN'.
", - "If only Chinese is checked, CRACE1 will be 'CHINESE'." - ), - paste("Remove duplicates on the Vital signs raw dataset based on subject number"), - paste("EC = 'Exposure as Collected'"), - paste( - "Merge AE raw dataset with SAE based on Subject number." - ) - ),stringsAsFactors = TRUE -) -knitr::kable(algorithms) -``` - -## Sub-algorithms - -{sdtm.oak} supports two levels for defining algorithms. For example, there -are some SDTM mappings where a certain action has to be taken only when -a condition is met. In such cases, the primary algorithm checks for the -condition, and the sub-algorithm executes the mappings when the -condition is met. - -Currently, sub-algorithms must be provided for this main algorithms. - -- condition_add -- dataset_level - -Some algorithms can be interchangeably used as algorithms and as -sub-algorithms as seen below (not an exhaustive list) - -![](algo_sub_algo_combo.jpg){width="650px"} - -The permutation & combination of algorithms & sub-algorithms creates -endless possibilities to accommodate different types of mappings. diff --git a/vignettes/articles/cm_raw_data.csv b/vignettes/articles/cm_raw_data.csv deleted file mode 100644 index 10390854..00000000 --- a/vignettes/articles/cm_raw_data.csv +++ /dev/null @@ -1,15 +0,0 @@ -PATNUM,SUBJSTAT,SITENM,INSTANCE,INSTRN,FOLDER,FOLDERL,FOLDERSQ,FORM,FORML,DATAPGID,PGREPNUM,RECORDDT,RECORDID,RECPOS,RECSTAT,MDNUM,MDNUM_RAW,MDREC,MDRAW,MDIND,MDBDR,MDBDTU,MDBTM,MDBTMU,MDPRIOR,MDEDR,MDEDT,MDETM,MDETMU,MDONG,DOS,DOSU,DOSUV,MDFORM,MDRTE,MDFRQ,MDPROPH,TERMID,SRCLN,RAVRFID,MODIFY,CMDRG,CMDRGCD,CMDECOD,CMPNCD,SPLIT,OMIT,ACTTYP,ACTTEXT,CMDICT,CMCLAS,CMCLASCD,CMATC4,CMATC4CD,CMATC3,CMATC3CD,CMATC2,CMATC2CD,CMATC1,CMATC1CD,CLASSNUM -375,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,56379253,0,,111885785,1,N,1,1,No,BABY ASPIRIN,,,1,,1,1,,,,0,1,10,mg,MG,Tablet,PO (Oral),QD (Every Day),0,109576058,20,5652739,BABY ASPIRIN,BABY ASPIRIN,2701701,ACETYLSALICYLIC ACID,2701001,,,,,"WHODRUG GLOBAL B3 MARCH 1, 2021",OTHER AGENTS FOR LOCAL ORAL TREATMENT,A01AD,OTHER AGENTS FOR LOCAL ORAL TREATMENT,A01AD,STOMATOLOGICAL PREPARATIONS,A01A,STOMATOLOGICAL PREPARATIONS,A01,ALIMENTARY TRACT AND METABOLISM,A,1 -375,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,56379253,0,,111969387,2,N,2,2,No,CORTISPORIN,NAUSEA,15-Sep-20,0,,1,0,,,,0,1,50,g,G,Pill,PO (Oral),,0,105820348,28,5533807,CORTISPORIN (UNITED STATES),CORTICOSTEROIDS AND ANTIINFECTIVES IN COMBINATION,90104001001,CORTICOSTEROIDS AND ANTIINFECTIVES IN COMBINATION,90104001001,,,COUNTRY,APPLY SITE COUNTRY TO TERM,"WHODRUG GLOBAL B3 MARCH 1, 2021",CORTICOSTEROIDS AND ANTIINFECTIVES IN COMBINATION,S03CA,CORTICOSTEROIDS AND ANTIINFECTIVES IN COMBINATION,S03CA,CORTICOSTEROIDS AND ANTIINFECTIVES IN COMBINATION,S03C,OPHTHALMOLOGICAL AND OTOLOGICAL PREPARATIONS,S03,SENSORY ORGANS,S,1 -376,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,56407664,0,,111939965,1,N,1,1,No,ASPIRIN,ANEMIA,17-Feb-21,0,8:00,0,0,17-Feb-21,2/17/21,,0,0,,,,,,,0,80619660,8,4297014,ASPIRIN,ASPIRIN [ACETYLSALICYLIC ACID],2701004,ACETYLSALICYLIC ACID,2701001,,,,,"WHODRUG GLOBAL B3 MARCH 1, 2021",OTHER AGENTS FOR LOCAL ORAL TREATMENT,A01AD,OTHER AGENTS FOR LOCAL ORAL TREATMENT,A01AD,STOMATOLOGICAL PREPARATIONS,A01A,STOMATOLOGICAL PREPARATIONS,A01,ALIMENTARY TRACT AND METABOLISM,A,1 -377,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,56408736,0,,111942855,1,N,1,1,No,DIPHENHYDRAMINE HCL,NAUSEA,4-Oct-20,0,9:00,0,0,,,,0,1,50,mg,MG,Capsule,PO (Oral),BID (Twice a Day),0,79751919,3,4240092,DIPHENHYDRAMINE HCL,DIPHENHYDRAMINE HCL,402246,DIPHENHYDRAMINE HYDROCHLORIDE,402001,,,,,"WHODRUG GLOBAL B3 MARCH 1, 2021",AMINOALKYL ETHERS,R06AA,AMINOALKYL ETHERS,R06AA,ANTIHISTAMINES FOR SYSTEMIC USE,R06A,ANTIHISTAMINES FOR SYSTEMIC USE,R06,RESPIRATORY SYSTEM,R,1 -377,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,56408736,0,,129972536,2,N,2,2,No,PARCETEMOL,PYREXIA,20-Jan-20,0,10:00,0,0,20-Jan-20,1/20/20,10:00,0,0,,mg,MG,Capsule,PO (Oral),BID (Twice a Day),1,129972536,2,,,,,,,,,,,,,,,,,,,,,, -377,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,56408736,0,,129972541,3,N,3,3,No,VOMIKIND,VOMITINGS,UN UNK 2019,1,,1,0,UN UNK 2019,6/15/19,,1,0,,Tablet,TABLET,,PO (Oral),PRN (As Needed),1,129972541,3,,,,,,,,,,,,,,,,,,,,,, -377,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,56408736,0,,129972568,4,N,5,5,No,ZENFLOX OZ,DIARHHEA,20 UNK 2019,0,10:00,0,0,20 UNK 2019,6/15/19,,1,0,,mL,ML,Injection,IM (Intramuscular),PRN (As Needed),1,129972568,4,,,,,,,,,,,,,,,,,,,,,, -378,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,63059916,0,,126472439,4,N,4,4,No,AMITRYPTYLINE,COLD,UN UNK 2020,0,,1,1,UN UNK 2020,6/15/20,,1,0,12,g,G,Inhalant,IA (Intra-arterial),QD (Every Day),0,81845879,6,4382628,AMITRIPTYLINE,AMITRIPTYLINE,2201001,AMITRIPTYLINE,2201001,,,SPELLING UPDATE,AMITRIPTYLINE,"WHODRUG GLOBAL B3 MARCH 1, 2021",DRUGS FOR URINARY FREQUENCY AND INCONTINENCE,G04BD,DRUGS FOR URINARY FREQUENCY AND INCONTINENCE,G04BD,UROLOGICALS,G04B,UROLOGICALS,G04,GENITO URINARY SYSTEM AND SEX HORMONES,G,1 -378,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,63059916,0,,126472436,1,N,1,1,No,BENADRYL,FEVER,26-Jan-20,0,9:00,0,0,26-Jan-20,1/26/20,7:00,0,0,100,mg,MG,Capsule,PO (Oral),BID (Twice a Day),1,95547017,4,5211852,BENADRYL (UNITED STATES),BENADRYL [DIPHENHYDRAMINE HYDROCHLORIDE],402002,DIPHENHYDRAMINE HYDROCHLORIDE,402001,,,COUNTRY,APPLY SITE COUNTRY TO TERM,"WHODRUG GLOBAL B3 MARCH 1, 2021",AMINOALKYL ETHERS,R06AA,AMINOALKYL ETHERS,R06AA,ANTIHISTAMINES FOR SYSTEMIC USE,R06A,ANTIHISTAMINES FOR SYSTEMIC USE,R06,RESPIRATORY SYSTEM,R,1 -378,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,63059916,0,,126472437,2,N,2,2,Yes,DIPHENHYDRAMINE HYDROCHLORIDE,LEG PAIN,28-Jan-20,1,,1,1,1-Feb-20,2/1/20,,1,1,100,Capsule,CAPSULE,Capsule,Unknown,QD (Every Day),0,94095723,13,5084095,DIPHENHYDRAMINE HYDROCHLORIDE,DIPHENHYDRAMINE HYDROCHLORIDE,402001,DIPHENHYDRAMINE HYDROCHLORIDE,402001,,,,,"WHODRUG GLOBAL B3 MARCH 1, 2021",AMINOALKYL ETHERS,R06AA,AMINOALKYL ETHERS,R06AA,ANTIHISTAMINES FOR SYSTEMIC USE,R06A,ANTIHISTAMINES FOR SYSTEMIC USE,R06,RESPIRATORY SYSTEM,R,1 -378,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,63059916,0,,126472438,3,N,3,3,Yes,TETRACYCLINE,FEVER,12-Feb-20,0,12:12,0,1,18-Feb-20,2/18/20,,0,0,10,mg,MG,Capsule,DE (Transdermal),BID (Twice a Day),1,84246445,6,4537684,TETRACYCLINE,TETRACYCLINE,1701001,TETRACYCLINE,1701001,,,,,"WHODRUG GLOBAL B3 MARCH 1, 2021",ANTIBIOTICS,S01AA,ANTIBIOTICS,S01AA,ANTIINFECTIVES,S01A,OPHTHALMOLOGICALS,S01,SENSORY ORGANS,S,1 -379,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,63059986,0,,126472565,3,N,1,1,No,BENADRYL,COLD,10 UNK 2020,0,,0,0,20 UNK 2020,6/15/20,,0,0,12,IU,IU,Lotion,IJ (Intra-articular), , ,84734661,12,4567194,BENADRYL (UNITED STATES),BENADRYL [DIPHENHYDRAMINE HYDROCHLORIDE],402002,DIPHENHYDRAMINE HYDROCHLORIDE,402001,,,COUNTRY,APPLY SITE COUNTRY TO TERM,"WHODRUG GLOBAL B3 MARCH 1, 2021",AMINOALKYL ETHERS,R06AA,AMINOALKYL ETHERS,R06AA,ANTIHISTAMINES FOR SYSTEMIC USE,R06A,ANTIHISTAMINES FOR SYSTEMIC USE,R06,RESPIRATORY SYSTEM,R,1 -379,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,63059986,0,,126472563,1,N,2,2,No,SOMINEX,COLD,,1,,1,0,,,,1,1,,mL,ML,Liquid,EP (Epidural),PRN (As Needed),0,82746644,5,4580551,SOMINEX (UNITED STATES),SOMINEX [DIPHENHYDRAMINE HYDROCHLORIDE],402060,DIPHENHYDRAMINE HYDROCHLORIDE,402001,,,COUNTRY,APPLY SITE COUNTRY TO TERM,"WHODRUG GLOBAL B3 MARCH 1, 2021",AMINOALKYL ETHERS,R06AA,AMINOALKYL ETHERS,R06AA,ANTIHISTAMINES FOR SYSTEMIC USE,R06A,ANTIHISTAMINES FOR SYSTEMIC USE,R06,RESPIRATORY SYSTEM,R,1 -379,Randomized,TEST SITE,Concomitant Medications,0,MD,Concomitant Medications,15,MD1,Concomitant Medications,63059986,0,,126472564,2,N,3,3,No,ZQUILL,PAIN,,1,,1,0,17-Feb-20,2/17/20,,1,0,5,%,%,Aerosol,OP (Ophthalmic),Q2H (Every 2 Hours),0,99707553,22,5330876,ZZZQUIL,ZZZQUIL,402326,DIPHENHYDRAMINE HYDROCHLORIDE,402001,,,SPELLING UPDATE,ZZZQUIL,"WHODRUG GLOBAL B3 MARCH 1, 2021",AMINOALKYL ETHERS,R06AA,AMINOALKYL ETHERS,R06AA,ANTIHISTAMINES FOR SYSTEMIC USE,R06A,ANTIHISTAMINES FOR SYSTEMIC USE,R06,RESPIRATORY SYSTEM,R,1 diff --git a/vignettes/articles/dm.csv b/vignettes/articles/dm.csv deleted file mode 100644 index 5d58d228..00000000 --- a/vignettes/articles/dm.csv +++ /dev/null @@ -1,6 +0,0 @@ -"STUDYID","DOMAIN","USUBJID","SUBJID","RFSTDTC","RFENDTC","RFXSTDTC","RFXENDTC","RFICDTC","RFPENDTC","DTHDTC","DTHFL","SITEID","INVID","INVNAM","BRTHDTC","AGE","AGEU","SEX","RACE","ETHNIC","ARMCD","ARM","ACTARMCD","ACTARM","COUNTRY","DMDTC","DMDY","RACE1","RACE2","RACE3" -"test_study","DM","test_study-375","test_study-375","1999-04-14T08:36","2013-01-21","2023-04-14T08:36","2021-01-11T07:50","2007-01-15","2020-04-02","2020-04-02","Y","111111","90009","Dr doctor9",NA,NA,NA,"F","MULTIPLE",NA,NA,NA,NA,NA,"US",NA,NA,"NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER","WHITE",NA -"test_study","DM","test_study-376","test_study-376","2001-03-21","2007-05-21","2020-03-21","2017-09-14T18:49",NA,"2011-12-18","2011-12-18",NA,"111111","90009","Dr doctor9","1981-02-26T18:07",42,"YEARS","M","MULTIPLE","NOT HISPANIC OR LATINO",NA,NA,NA,NA,"US",NA,NA,"BLACK OR AFRICAN AMERICAN","AMERICAN INDIAN OR ALASKA NATIVE","UNKNOWN" -"test_study","DM","test_study-377","test_study-377","1999-03-14","2021-05-05","2020-03-14","2013-08-23T12:37","2015-10-07","2021-05-05","2019-06-29",NA,"111111","90009","Dr doctor9","1968-03-19T04:36",56,"YEARS",NA,"MULTIPLE","NOT REPORTED",NA,NA,NA,NA,"US",NA,NA,"ASIAN","AMERICAN INDIAN OR ALASKA NATIVE","UNKNOWN" -"test_study","DM","test_study-378","test_study-378","2003-02-06T06:33","2021-04-24T09:06","2021-02-06T06:33","2021-04-24T09:06","2018-10-20","2017-04-11","2017-04-11",NA,"111111","90009","Dr doctor9","1979-09-24",45,"YEARS","M","BLACK OR AFRICAN AMERICAN","HISPANIC OR LATINO",NA,NA,NA,NA,"US",NA,NA,NA,NA,NA -"test_study","DM","test_study-379","test_study-379","2003-02-06T06:33","2021-04-24T09:06","2022-02-06T06:33","2021-04-24T09:06","2018-10-20","2017-04-11","2017-04-11","Y","111111","90009","Dr doctor9","1963-09-24",61,"YEARS","M","BLACK OR AFRICAN AMERICAN","HISPANIC OR LATINO",NA,NA,NA,NA,"US",NA,NA,NA,NA,NA diff --git a/vignettes/articles/events_domain.Rmd b/vignettes/articles/events_domain.Rmd deleted file mode 100644 index 9a9156db..00000000 --- a/vignettes/articles/events_domain.Rmd +++ /dev/null @@ -1,572 +0,0 @@ ---- -title: "Creating an Events SDTM domain" -output: - rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Creating an Events SDTM domain} - %\VignetteEncoding{UTF-8} - %\VignetteEngine{knitr::rmarkdown} ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -#library(sdtm.oak) -library(admiraldev) -library(rlang) -library(dplyr, warn.conflicts = FALSE) -``` - -```{r, include=FALSE} -devtools::load_all(".") -``` -# Introduction - -This article describes creating an Events SDTM domain using the `sdtm.oak` package. Examples are currently presented and tested in the context of the CM domain. - -# Raw data - -Raw datasets can be exported from the EDC systems in the format they are collected. The example used provides a raw dataset for Concomitant medications, where the collected data is represented as columns for each subject. For example, the Medication Name(MDRAW), Medication Start Date (MDBDR), Start Time (MDBTM), End Date (MDEDR), End time (MDETM), etc. are represented as columns.This format is commonly used in most EDC systems. - -The raw dataset is presented below: - -```{r eval=TRUE, echo=FALSE} -cm_raw <- as_tibble(read.csv("cm_raw_data.csv", stringsAsFactors = FALSE)) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm_raw, - display_vars = exprs( - PATNUM, FORML, MDNUM, MDRAW, MDIND, MDBDR, MDBTM, MDPRIOR, MDEDR, - MDETM, MDONG, DOS, DOSU, MDFORM, MDRTE, MDFRQ, MDPROPH - ) -) -``` - -# Programming workflow - -In {sdtm.oak} we process one raw dataset at a time. Similar raw datasets (example Concomitant medications (OID - cm_raw), Targeted Concomitant Medications (OID - cm_t_raw)) can be stacked together before processing. - -* [Read in data](#readdata) -* [Create oak_id_vars](#oakidvars) -* [Read in CT](#readct) -* [Map Topic Variable](#maptopic) -* [Map Rest of the Variables](#maprest) - * [assign_no_ct](#assign_no_ct) - * [assign_ct](#assign_ct) - * [assign_datetime](#assign_datetime) - * [hardcode_ct and condition_add](#hardcode_ct) - * [hardcode_no_ct and condition_add](#hardcode_no_ct) - * [condition_add involving target domain](#condition_add_tar) - * [condition_add involving raw dataset and target domain](#condition_add_raw_tar) -* [Repeat Map Topic and Map Rest](#repeatsteps) - -Repeat the above steps for different raw datasets before proceeding with the below steps. - -* [Create SDTM derived variables](#derivedvars) -* [Add Labels and Attributes](#attributes) - -## Read in data {#readdata} - -Read all the raw datasets into the environment. In this example, the raw dataset name is `cm_raw`. Users can read it from the package using the below code: - -```{r eval=FALSE} -cm_raw <- read.csv(system.file("cm_domain/cm_raw_data.csv", - package = "sdtm.oak")) - -dm <- read.csv(system.file("cm_domain/dm.csv", - package = "sdtm.oak")) -``` - -## Create oak_id_vars {#oakidvars} - -The `oak_id_vars` is a crucial link between the raw datasets and the mapped SDTM domain. As the user derives each SDTM variable, it is merged with the corresponding topic variable using `oak_id_vars`. In {sdtm.oak}, the variables oak_id, raw_source, and patient_number are considered as `oak_id_vars`. These three variables must be added to all raw datasets. They are used in multiple places in the programming. - -oak_id:- Type: numeric- Value: equal to the raw dataframe row number. - -raw_source:- Type: Character- Value: equal to the raw dataset (eCRF) name or eDT dataset name. - -patient_number:- Type: numeric- Value: equal to the subject number in CRF or NonCRF data source. - -```{r eval=TRUE} -cm_raw <- cm_raw %>% - generate_oak_id_vars(pat_var = "PATNUM", - raw_src = "cm_raw") -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm_raw, - display_vars = exprs( - oak_id, raw_source, patient_number, PATNUM, FORML, MDNUM, MDRAW - ) -) -``` - -Read in the DM domain - -```{r eval=TRUE} -dm <- as_tibble(read.csv("dm.csv", stringsAsFactors = FALSE)) -``` - - -## Read in CT {#readct} - -Controlled Terminology is part of the SDTM specification and it is prepared by the user. In this example, the study controlled terminology name is `sdtm_ct.csv`. Users can read it from the package using the below code: - -```{r eval=FALSE} -study_ct <- read.csv(system.file("cm_domain/sdtm_ct.csv", - package = "sdtm.oak")) -``` - -```{r eval=TRUE} -study_ct <- as_tibble(read.csv("sdtm_ct.csv", stringsAsFactors = FALSE)) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - study_ct, - display_vars = exprs( - codelist_code, term_code, term_value, collected_value, term_preferred_term, - term_synonyms - ) -) -``` - -## Map Topic Variable {#maptopic} - -The topic variable is mapped as a first step in the mapping process. It is the primary variable in the SDTM domain. The rest of the variables add further definition to the topic variable. In this example, the topic variable is `CMTRT`. It is mapped from the raw dataset column `MDRAW`. The mapping logic is `Map the collected value in the cm_raw dataset MDRAW variable to CM.CMTRT`. - -This mapping does not involve any controlled terminology. The `assign_no_ct` function is used for mapping. Once the topic variable is mapped, the Qualifier, Identifier, and Timing variables can be mapped. - -```{r eval=TRUE} -cm <- - # Map topic variable - assign_no_ct( - raw_dat = cm_raw, - raw_var = "MDRAW", - tgt_var = "CMTRT" - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT - ) -) -``` -## Map Rest of the Variables {#maprest} - -The Qualifiers, Identifiers, and Timing Variables can be mapped in any order. In this example, we will map each variable one by one to demonstrate different mapping algorithms. - -### assign_no_ct {#assign_no_ct} - -The mapping logic for `CMGRPID` is `Map the collected value in the cm_raw dataset MDNUM variable to CM.CMGRPID`. - - -```{r eval=TRUE} -cm <- cm %>% - # Map CMGRPID - assign_no_ct( - raw_dat = cm_raw, - raw_var = "MDNUM", - tgt_var = "CMGRPID", - id_vars = oak_id_vars() - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT, CMGRPID - ) -) -``` - -The CMGRPID is added to the corresponding CMTRT based on the 'oak_id_vars'. When calling the function, the parameter 'id_vars = oak_id_vars()' matches the raw dataset 'oak_id_vars' to the 'oak_id_vars' in the cm domain created in the previous step. It's important to note that the 'oak_id_vars' can be extended to include user-defined variables. But in most cases, the three variables should suffice. - -### assign_ct {#assign_ct} - -The mapping logic for `CMDOSU` is `Map the collected value in the cm_raw dataset DOSU variable to CM.CMDOSU`. The controlled terminology is used to map the collected value to the standard value. `assign_ct` is the right algorithm to perform this mapping. - -```{r eval=TRUE} -cm <- cm %>% - # Map qualifier CMDOSU - assign_ct( - raw_dat = cm_raw, - raw_var = "DOSU", - tgt_var = "CMDOSU", - ct_spec = study_ct, - ct_clst = "C71620", - id_vars = oak_id_vars() - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT, CMGRPID, CMDOSU - ) -) -``` - -### assign_datetime {#assign_datetime} - -The mapping logic for `CMSTDTC` is `Map the collected value in the cm_raw dataset MDBDR (start date) variable and MDBTM (start time) to CM.CMSTDTC`. The collected date value is in the format 'dd mmm yyyy'. The collected time value is in 'H"M' format. The `assign_datetime` function is used to map the collected value in ISO8601 format. - -```{r eval=TRUE} -cm <- cm %>% -# Map CMSTDTC. This function calls create_iso8601 - assign_datetime( - raw_dat = cm_raw, - raw_var = c("MDBDR", "MDBTM"), - tgt_var = "CMSTDTC", - raw_fmt = c(list(c("d-m-y", "dd mmm yyyy")), "H:M"), - raw_unk = c("UN", "UNK"), - id_vars = oak_id_vars() - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT, CMGRPID, CMDOSU, CMSTDTC - ) -) -``` - -### hardcode_ct and condition_add {#hardcode_ct} - -The mapping logic for `CMSTRTPT` is as follows: `If the collected value in the raw variable MDPRIOR and raw dataset cm_raw equals to 1, then CM.CMSTRTPT == 'BEFORE'.` The `hardcode_ct` function is used to map the CMSTRTPT as it involves hardcoding a specific value to an SDTM variable with controlled terminology. The `condition_add` function filters the raw dataset based on a particular condition, and the `hardcode_ct` function performs the mapping. - -When these two functions are used together, the `condition_add` function first filters the raw dataset based on the specified condition. Next, the filtered dataset is then passed to the `hardcode_ct` function to assign the appropriate value. This example illustrates how the `hardcode_ct` algorithm functions as a sub-algorithm to `condition_add`. - -```{r eval=TRUE} -cm <- cm %>% - # Map qualifier CMSTRTPT Annotation text is If MDPRIOR == 1 then CM.CMSTRTPT = 'BEFORE' - hardcode_ct( - raw_dat = condition_add(cm_raw, MDPRIOR == "1"), - raw_var = "MDPRIOR", - tgt_var = "CMSTRTPT", - tgt_val = "BEFORE", - ct_spec = study_ct, - ct_clst = "C66728", - id_vars = oak_id_vars() - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT, CMGRPID, CMDOSU, CMSTDTC, CMSTRTPT - ) -) -``` - -The `condition_add` function adds additional metadata to the records in the raw dataset that meets the condition. Refer to the function documentation for more details. `hardcode_ct` function uses the addtional metadata to find the records that meet the criteria and map them accordingly. - -### hardcode_no_ct and condition_add {#hardcode_no_ct} - -The mapping logic for `CMSTTPT` is as follows: `If the collected value in the raw variable MDPRIOR and raw dataset cm_raw equals to 1, then CM.CMSTTPT == 'SCREENING'.` The `hardcode_no_ct` function is used to map the CMSTTPT as it involves hardcoding a specific value to an SDTM variable without controlled terminology. The `condition_add` function filters the raw dataset based on a particular condition, and the `hardcode_no_ct` function performs the mapping. - -```{r eval=TRUE} -cm <- cm %>% - # Map qualifier CMSTTPT Annotation text is If MDPRIOR == 1 then CM.CMSTTPT = 'SCREENING' - hardcode_no_ct( - raw_dat = condition_add(cm_raw, MDPRIOR == "1"), - raw_var = "MDPRIOR", - tgt_var = "CMSTTPT", - tgt_val = "SCREENING", - id_vars = oak_id_vars() - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT, CMGRPID, CMDOSU, CMSTDTC, CMSTRTPT, CMSTTPT - ) -) -``` - -### condition_add involving target domain {#condition_add_tar} - -In the mapping for `CMSTRTPT` and `CMSTTTPT`, the `condition_add` function is used in the raw dataset. In this mapping, we can explore how to use `condition_add` to add a filter condition based on the target SDTM variable. - -The mapping logic for `CMDOSFRQ` is `If CMTRT is not null, then map the collected value in raw dataset cm_raw and raw variable MDFRQ to CMDOSFRQ.` This may or may not represent a valid SDTM mapping in an actual study, but it can be used as an example. - -In this mapping, the `condition_add` function filters the cm domain created in the previous step and adds metadata to the records where it meets the condition. The `assign_ct` function uses the additional metadata to find the records that meet the criteria and map them accordingly. - -```{r eval=TRUE} -cm <- cm %>% - # Map qualifier CMDOSFRQ Annotation text is If CMTRT is not null then map the collected value in raw dataset cm_raw and raw variable MDFRQ to CMDOSFRQ - {assign_ct( - raw_dat = cm_raw, - raw_var = "MDFRQ", - tgt_dat = condition_add(. , !is.na(CMTRT)), - tgt_var = "CMDOSFRQ", - ct_spec = study_ct, - ct_clst = "C66728", - id_vars = oak_id_vars() - )} -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT, CMGRPID, CMDOSU, CMSTDTC, CMSTRTPT, CMSTTPT, CMDOSFRQ - ) -) -``` - -Remember to use additional curly braces in the function call when using the `condition_add` function on the target dataset. This is necessary because the input target dataset is represented as a `.` and is passed on from the previous step using the {magrittr} pipe operator. Currently, there is a limitation when using a nested function call with `.` to reference one of the input parameters, and this [recommended approach](https://magrittr.tidyverse.org/reference/pipe.html#using-the-dot-for-secondary-purposes) will overcome that. - -The placeholder `.` is for use with {magrittr} pipe `%>%` operator. We encourage using `.` and {magrittr} pipe `%>%` operator when using {sdtm.oak} functions. - -Another way to achieve the same outcome is by moving the 'condition_by' call up one level, as illustrated below: it is not required to use the {magrittr} pipe `%>%` or curly braces in this case. - -```{r eval=FALSE} -cm <- cm %>% - condition_add(!is.na(CMTRT)) %>% - assign_ct( - raw_dat = cm_raw, - raw_var = "DOSU", - tgt_var = "CMDOSU", - ct_spec = study_ct, - ct_clst = "C71620", - id_vars = oak_id_vars() - ) -``` - -### condition_add involving raw dataset and target domain {#condition_add_raw_tar} - -In this mapping, we can explore how to use `condition_add` to add a filter condition based on the target SDTM variable. - -The mapping logic for `CMMODIFY` is `If collected value in MODIFY in cm_raw is different to CM.CMTRT then assign the collected value to CMMODIFY in CM domain (CM.CMMODIFY)`. The `assign_no_ct` function is used to map the CMMODIFY as it involves mapping the collected value to the SDTM variable without controlled terminology. The `condition_add` function filters the raw dataset & target dataset based on a particular condition, and the `assign_no_ct` function performs the mapping. - -```{r eval=TRUE} -cm <- cm %>% - # Map CMMODIFY Annotation text If collected value in MODIFY in cm_raw is different to CM.CMTRT then assign the collected value to CMMODIFY in CM domain (CM.CMMODIFY) - {assign_no_ct( - raw_dat = cm_raw, - raw_var = "MODIFY", - tgt_dat = condition_add(. , MODIFY != CMTRT, .dat2 = cm_raw), - tgt_var = "CMMODIFY", - id_vars = oak_id_vars() - )} -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT, CMGRPID, CMDOSU, CMSTDTC, CMSTRTPT, CMSTTPT, CMDOSFRQ, CMMODIFY - ) -) -``` - -Another way to achieve the same outcome is by moving the 'condition_by' call up one level, as illustrated below: it is not required to use the {magrittr} pipe `%>%` or curly braces in this case. - -```{r eval=FALSE} -cm <- cm %>% - condition_add(MODIFY != CMTRT, .dat2 = cm_raw) %>% - assign_no_ct( - raw_dat = cm_raw, - raw_var = "MODIFY", - tgt_var = "CMMODIFY", - id_vars = oak_id_vars() - ) -``` - -Now, complete mapping the rest of the SDTM variables. - -```{r eval=TRUE} -cm <- cm %>% - # Map CMINDC as the collected value in MDIND to CM.CMINDC - assign_no_ct( - raw_dat = cm_raw, - raw_var = "MDIND", - tgt_var = "CMINDC", - id_vars = oak_id_vars() - ) %>% - # Map CMENDTC as the collected value in MDEDR and MDETM to CM.CMENDTC. - # This function calls create_iso8601 - assign_datetime( - raw_dat = cm_raw, - raw_var = c("MDEDR", "MDETM"), - tgt_var = "CMENDTC", - raw_fmt = c("d-m-y", "H:M"), - raw_unk = c("UN", "UNK") - ) %>% - # Map qualifier CMENRTPT as If MDONG == 1 then CM.CMENRTPT = 'ONGOING' - hardcode_ct( - raw_dat = condition_add(cm_raw, MDONG == "1"), - raw_var = "MDONG", - tgt_var = "CMENRTPT", - tgt_val = "ONGOING", - ct_spec = study_ct, - ct_clst = "C66728", - id_vars = oak_id_vars() - ) %>% - # Map qualifier CMENTPT as If MDONG == 1 then CM.CMENTPT = 'DATE OF LAST ASSESSMENT' - hardcode_no_ct( - raw_dat = condition_add(cm_raw, MDONG == "1"), - raw_var = "MDONG", - tgt_var = "CMENTPT", - tgt_val = "DATE OF LAST ASSESSMENT", - id_vars = oak_id_vars() - ) %>% - # Map qualifier CMDOS as If collected value in raw_var DOS is numeric then CM.CMDOSE - assign_no_ct( - raw_dat = condition_add(cm_raw, is.numeric(DOS)), - raw_var = "DOS", - tgt_var = "CMDOS", - id_vars = oak_id_vars() - ) %>% - # Map qualifier CMDOS as If collected value in raw_var DOS is character then CM.CMDOSTXT - assign_no_ct( - raw_dat = condition_add(cm_raw, is.character(DOS)), - raw_var = "DOS", - tgt_var = "CMDOSTXT", - id_vars = oak_id_vars() - ) %>% - # Map qualifier CMDOSU as the collected value in the cm_raw dataset DOSU variable to CM.CMDOSU - assign_ct( - raw_dat = cm_raw, - raw_var = "DOSU", - tgt_var = "CMDOSU", - ct_spec = study_ct, - ct_clst = "C71620", - id_vars = oak_id_vars() - ) %>% - # Map qualifier CMDOSFRM as the collected value in the cm_raw dataset MDFORM variable to CM.CMDOSFRM - assign_ct( - raw_dat = cm_raw, - raw_var = "MDFORM", - tgt_var = "CMDOSFRM", - ct_spec = study_ct, - ct_clst = "C66726", - id_vars = oak_id_vars() - ) %>% - # Map CMROUTE as the collected value in the cm_raw dataset MDRTE variable to CM.CMROUTE - assign_ct( - raw_dat = cm_raw, - raw_var = "MDRTE", - tgt_var = "CMROUTE", - ct_spec = study_ct, - ct_clst = "C66729", - id_vars = oak_id_vars() - ) %>% - # Map qualifier CMPROPH as If MDPROPH == 1 then CM.CMPROPH = 'Y' - hardcode_ct( - raw_dat = condition_add(cm_raw, MDPROPH == "1"), - raw_var = "MDPROPH", - tgt_var = "CMPROPH", - tgt_val = "Y", - ct_spec = study_ct, - ct_clst = "C66742", - id_vars = oak_id_vars() - ) %>% - # Map CMDRG as the collected value in the cm_raw dataset CMDRG variable to CM.CMDRG - assign_no_ct( - raw_dat = cm_raw, - raw_var = "CMDRG", - tgt_var = "CMDRG", - id_vars = oak_id_vars() - ) %>% - # Map CMDRGCD as the collected value in the cm_raw dataset CMDRGCD variable to CM.CMDRGCD - assign_no_ct( - raw_dat = cm_raw, - raw_var = "CMDRGCD", - tgt_var = "CMDRGCD", - id_vars = oak_id_vars() - ) %>% - # Map CMDECOD as the collected value in the cm_raw dataset CMDECOD variable to CM.CMDECOD - assign_no_ct( - raw_dat = cm_raw, - raw_var = "CMDECOD", - tgt_var = "CMDECOD", - id_vars = oak_id_vars() - ) %>% - # Map CMPNCD as the collected value in the cm_raw dataset CMPNCD variable to CM.CMPNCD - assign_no_ct( - raw_dat = cm_raw, - raw_var = "CMPNCD", - tgt_var = "CMPNCD", - id_vars = oak_id_vars() - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, CMTRT, CMGRPID, CMDOSU, CMSTDTC, CMSTRTPT, CMSTTPT, CMDOSFRQ, CMMODIFY, CMINDC, CMENDTC, CMENRTPT, CMENTPT, CMDOS, CMDOSTXT, CMDOSU, CMDOSFRM, CMROUTE, CMPROPH, CMDRG, CMDRGCD, CMDECOD, CMPNCD - ) -) -``` - -## Repeat Map Topic and Map Rest {#repeatsteps} - -There is only one topic variable in this raw data source, and there are no additional topic variable mappings. Users can proceed to the next step. This is required only if there is more than one topic variable to map. - -## Create SDTM derived variables {#derivedvars} - -The SDTM derived variables or any SDTM mapping that is applicable to all the records in the `cm` dataset produced in the previous step cam be created now. In this example, we will create the `CMSEQ` variable. The mapping logic is `Create a sequence number for each record in the CM domain`. - -```{r eval=TRUE} - cm <- cm %>% - # The below mappings are applicable to all the records in the cm domain, - # hence can be derived using mutate statement. - dplyr::mutate( - STUDYID = "test_study", - DOMAIN = "CM", - CMCAT = "GENERAL CONMED", - USUBJID = paste0("test_study", "-", cm_raw$PATNUM) - ) %>% - # derive sequence number - # derive_seq(tgt_var = "CMSEQ", - # rec_vars= c("USUBJID", "CMGRPID")) %>% - derive_study_day( - sdtm_in = ., - dm_domain = dm, - tgdt = "CMENDTC", - refdt = "RFXSTDTC", - study_day_var = "CMENDY" - ) %>% - derive_study_day( - sdtm_in = ., - dm_domain = dm, - tgdt = "CMSTDTC", - refdt = "RFXSTDTC", - study_day_var = "CMSTDY" - ) %>% - # Add code for derive Baseline flag. - dplyr::select("STUDYID", "DOMAIN" , "USUBJID", everything()) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - cm, - display_vars = exprs( - oak_id, raw_source, patient_number, STUDYID, DOMAIN, USUBJID, CMGRPID, CMTRT, CMDOSU, CMSTDTC, CMSTRTPT, CMSTTPT, CMDOSFRQ, CMMODIFY, CMINDC, CMENDTC, CMENRTPT, CMENTPT, CMDOS, CMDOSTXT, CMDOSU, CMDOSFRM, CMROUTE, CMPROPH, CMDRG, CMDRGCD, CMDECOD, CMPNCD, CMSTDY, CMENDY - ) -) -``` - - -## Add Labels and Attributes {#attributes} - -Yet to be developed. diff --git a/vignettes/articles/findings_domain.Rmd b/vignettes/articles/findings_domain.Rmd deleted file mode 100644 index bf9ed5ae..00000000 --- a/vignettes/articles/findings_domain.Rmd +++ /dev/null @@ -1,564 +0,0 @@ ---- -title: "Creating an Findings SDTM domain" -output: - rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Creating an Findings SDTM domain} - %\VignetteEncoding{UTF-8} - %\VignetteEngine{knitr::rmarkdown} ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) - -#library(sdtm.oak) -library(admiraldev) -library(rlang) -library(dplyr, warn.conflicts = FALSE) -``` - -```{r, include=FALSE} -devtools::load_all(".") -``` -# Introduction - -This article describes how to create a Findings SDTM domain using the {sdtm.oak} package. Examples are currently presented and tested in the context of the VS domain. - -Before reading this article, it is recommended that users review the "Creating an Events Domain" article, which provides a detailed explanation of various concepts in {sdtm.oak}, such as `oak_id_vars`, `condition_add`, etc. It also offers guidance on which mapping algorithms or functions to use for different mappings and provides a more detailed explanation of how these mapping algorithms or functions work. - -In this article, we will dive directly into programming and provide further explanation only where it is required. - -# Programming workflow - -In {sdtm.oak} we process one raw dataset at a time. Similar raw datasets (example Concomitant medications (OID - cm_raw), Targeted Concomitant Medications (OID - cm_t_raw)) can be stacked together before processing. - -* [Read in data](#readdata) -* [Create oak_id_vars](#oakidvars) -* [Read in CT](#readct) -* [Map Topic Variable](#maptopic) -* [Map Rest of the Variables](#maprest) -* [Repeat Map Topic and Map Rest](#repeatsteps) - -Repeat the above steps for different raw datasets before proceeding with the below steps. - -* [Create SDTM derived variables](#derivedvars) -* [Add Labels and Attributes](#attributes) - -## Read in data {#readdata} - -Read all the raw datasets into the environment. In this example, the raw dataset name is `cm_raw`. Users can read it from the package using the below code: - -```{r eval=FALSE} -vs_raw <- read.csv(system.file("cm_domain/vitals_raw_data.csv", - package = "sdtm.oak")) - -dm <- read.csv(system.file("cm_domain/dm.csv", - package = "sdtm.oak")) -``` - -```{r eval=TRUE, echo=FALSE} -vs_raw <- as_tibble(read.csv("vitals_raw_data.csv", stringsAsFactors = FALSE)) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - vs_raw, - display_vars = exprs( - PATNUM, FORML, ASMNTDN, TMPTC, VTLD, VTLTM, SUBPOS, SYS_BP, DIA_BP, - PULSE, RESPRT, TEMP, TEMPLOC, OXY_SAT, LAT, LOC - ) -) -``` - -## Create oak_id_vars {#oakidvars} - -```{r eval=TRUE} -vs_raw <- vs_raw %>% - generate_oak_id_vars(pat_var = "PATNUM", - raw_src = "vitals") -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - vs_raw, - display_vars = exprs( - oak_id, raw_source, patient_number, PATNUM, FORML, SYS_BP, DIA_BP - ) -) -``` - -Read in the DM domain - -```{r eval=TRUE, echo=FALSE} -dm <- as_tibble(read.csv("dm.csv", stringsAsFactors = FALSE)) -``` - - -## Read in CT {#readct} - -Controlled Terminology is part of the SDTM specification and it is prepared by the user. In this example, the study controlled terminology name is `sdtm_ct.csv`. Users can read it from the package using the below code: - -```{r eval=FALSE} -study_ct <- read.csv(system.file("cm_domain/sdtm_ct.csv", - package = "sdtm.oak")) -``` - -```{r eval=TRUE, echo=FALSE} -study_ct <- as_tibble(read.csv("sdtm_ct.csv", stringsAsFactors = FALSE)) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - study_ct, - display_vars = exprs( - codelist_code, term_code, term_value, collected_value, term_preferred_term, - term_synonyms - ) -) -``` - -## Map Topic Variable {#maptopic} - -This raw dataset has multiple topic variables. Lets start with the first topic variable. Map topic variable SYSBP from the raw variable SYS_BP. - -```{r eval=TRUE} -# Map topic variable SYSBP and its qualifiers. -vs_sysbp <- - hardcode_ct( - raw_dat = vs_raw, - raw_var = "SYS_BP", - tgt_var = "VSTESTCD", - tgt_val = "SYSBP", - ct_spec = study_ct, - ct_clst = "C66741" - ) %>% - # Filter for records where VSTESTCD is not empty. - # Only these records need qualifier mappings. - dplyr::filter(!is.na(.data$VSTESTCD)) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - vs_sysbp, - display_vars = exprs( - oak_id, raw_source, patient_number, VSTESTCD - ) -) -``` - -## Map Rest of the Variables {#maprest} - -Map rest of the variables applicable to the topic variable SYSBP. This can include qualifiers, identifier and timing variables. - -```{r eval=TRUE} -# Map topic variable SYSBP and its qualifiers. -vs_sysbp <- vs_sysbp %>% - # Map VSTEST using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "SYS_BP", - tgt_var = "VSTEST", - tgt_val = "Systolic Blood Pressure", - ct_spec = study_ct, - ct_clst = "C67153", - id_vars = oak_id_vars() - ) %>% - # Map VSORRES using assign_no_ct algorithm - assign_no_ct( - raw_dat = vs_raw, - raw_var = "SYS_BP", - tgt_var = "VSORRES", - id_vars = oak_id_vars() - ) %>% - # Map VSORRESU using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "SYS_BP", - tgt_var = "VSORRESU", - tgt_val = "mmHg", - ct_spec = study_ct, - ct_clst = "C66770", - id_vars = oak_id_vars() - ) %>% - # Map VSPOS using assign_ct algorithm - assign_ct( - raw_dat = vs_raw, - raw_var = "SUBPOS", - tgt_var = "VSPOS", - ct_spec = study_ct, - ct_clst = "C71148", - id_vars = oak_id_vars() - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - vs_sysbp, - display_vars = exprs( - oak_id, raw_source, patient_number, VSTESTCD, VSTEST, VSORRES, VSORRESU, VSPOS - ) -) -``` - -## Repeat Map Topic and Map Rest {#repeatsteps} - -This raw data source has other topic variables DIABP, PULSE, RESP, TEMP, OXYSAT, VSALL and its corresponding qualifiers. Repeat mapping topic and qualifiers for each topic variable. - -```{r eval=TRUE} -# Map topic variable DIABP and its qualifiers. -vs_diabp <- - hardcode_ct( - raw_dat = vs_raw, - raw_var = "DIA_BP", - tgt_var = "VSTESTCD", - tgt_val = "DIABP", - ct_spec = study_ct, - ct_clst = "C66741" - ) %>% - dplyr::filter(!is.na(.data$VSTESTCD)) %>% - # Map VSTEST using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "DIA_BP", - tgt_var = "VSTEST", - tgt_val = "Diastolic Blood Pressure", - ct_spec = study_ct, - ct_clst = "C67153", - id_vars = oak_id_vars() - ) %>% - # Map VSORRES using assign_no_ct algorithm - assign_no_ct( - raw_dat = vs_raw, - raw_var = "DIA_BP", - tgt_var = "VSORRES", - id_vars = oak_id_vars() - ) %>% - # Map VSORRESU using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "DIA_BP", - tgt_var = "VSORRESU", - tgt_val = "mmHg", - ct_spec = study_ct, - ct_clst = "C66770", - id_vars = oak_id_vars() - ) %>% - # Map VSPOS using assign_ct algorithm - assign_ct( - raw_dat = vs_raw, - raw_var = "SUBPOS", - tgt_var = "VSPOS", - ct_spec = study_ct, - ct_clst = "C71148", - id_vars = oak_id_vars() - ) - -# Map topic variable PULSE and its qualifiers. -vs_pulse <- - hardcode_ct( - raw_dat = vs_raw, - raw_var = "PULSE", - tgt_var = "VSTESTCD", - tgt_val = "PULSE", - ct_spec = study_ct, - ct_clst = "C66741" - ) %>% - dplyr::filter(!is.na(.data$VSTESTCD)) %>% - # Map VSTEST using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "PULSE", - tgt_var = "VSTEST", - tgt_val = "Pulse Rate", - ct_spec = study_ct, - ct_clst = "C67153", - id_vars = oak_id_vars() - ) %>% - # Map VSORRES using assign_no_ct algorithm - assign_no_ct( - raw_dat = vs_raw, - raw_var = "PULSE", - tgt_var = "VSORRES", - id_vars = oak_id_vars() - ) %>% - # Map VSORRESU using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "PULSE", - tgt_var = "VSORRESU", - tgt_val = "beats/min", - ct_spec = study_ct, - ct_clst = "C66770", - id_vars = oak_id_vars() - ) - -# Map topic variable RESP from the raw variable RESPRT and its qualifiers. -vs_resp <- - hardcode_ct( - raw_dat = vs_raw, - raw_var = "RESPRT", - tgt_var = "VSTESTCD", - tgt_val = "RESP", - ct_spec = study_ct, - ct_clst = "C66741" - ) %>% - dplyr::filter(!is.na(.data$VSTESTCD)) %>% - # Map VSTEST using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "RESPRT", - tgt_var = "VSTEST", - tgt_val = "Respiratory Rate", - ct_spec = study_ct, - ct_clst = "C67153", - id_vars = oak_id_vars() - ) %>% - # Map VSORRES using assign_no_ct algorithm - assign_no_ct( - raw_dat = vs_raw, - raw_var = "RESPRT", - tgt_var = "VSORRES", - id_vars = oak_id_vars() - ) %>% - # Map VSORRESU using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "RESPRT", - tgt_var = "VSORRESU", - tgt_val = "breaths/min", - ct_spec = study_ct, - ct_clst = "C66770", - id_vars = oak_id_vars() - ) - -# Map topic variable TEMP from raw variable TEMP and its qualifiers. -vs_temp <- - hardcode_ct( - raw_dat = vs_raw, - raw_var = "TEMP", - tgt_var = "VSTESTCD", - tgt_val = "TEMP", - ct_spec = study_ct, - ct_clst = "C66741" - ) %>% - dplyr::filter(!is.na(.data$VSTESTCD)) %>% - # Map VSTEST using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "TEMP", - tgt_var = "VSTEST", - tgt_val = "Temperature", - ct_spec = study_ct, - ct_clst = "C67153", - id_vars = oak_id_vars() - ) %>% - # Map VSORRES using assign_no_ct algorithm - assign_no_ct( - raw_dat = vs_raw, - raw_var = "TEMP", - tgt_var = "VSORRES", - id_vars = oak_id_vars() - ) %>% - # Map VSORRESU using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "TEMP", - tgt_var = "VSORRESU", - tgt_val = "C", - ct_spec = study_ct, - ct_clst = "C66770", - id_vars = oak_id_vars() - ) %>% - # Map VSLOC from TEMPLOC using assign_ct - assign_ct( - raw_dat = vs_raw, - raw_var = "TEMPLOC", - tgt_var = "VSLOC", - ct_spec = study_ct, - ct_clst = "C74456", - id_vars = oak_id_vars() - ) - -# Map topic variable OXYSAT from raw variable OXY_SAT and its qualifiers. -vs_oxysat <- - hardcode_ct( - raw_dat = vs_raw, - raw_var = "OXY_SAT", - tgt_var = "VSTESTCD", - tgt_val = "OXYSAT", - ct_spec = study_ct, - ct_clst = "C66741" - ) %>% - dplyr::filter(!is.na(.data$VSTESTCD)) %>% - # Map VSTEST using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "OXY_SAT", - tgt_var = "VSTEST", - tgt_val = "Oxygen Saturation", - ct_spec = study_ct, - ct_clst = "C67153", - id_vars = oak_id_vars() - ) %>% - # Map VSORRES using assign_no_ct algorithm - assign_no_ct( - raw_dat = vs_raw, - raw_var = "OXY_SAT", - tgt_var = "VSORRES", - id_vars = oak_id_vars() - ) %>% - # Map VSORRESU using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "OXY_SAT", - tgt_var = "VSORRESU", - tgt_val = "%", - ct_spec = study_ct, - ct_clst = "C66770", - id_vars = oak_id_vars() - ) %>% - # Map VSLAT using assign_ct from raw variable LAT - assign_ct( - raw_dat = vs_raw, - raw_var = "LAT", - tgt_var = "VSLAT", - ct_spec = study_ct, - ct_clst = "C99073", - id_vars = oak_id_vars() - ) %>% - # Map VSLOC using assign_ct from raw variable LOC - assign_ct( - raw_dat = vs_raw, - raw_var = "LOC", - tgt_var = "VSLOC", - ct_spec = study_ct, - ct_clst = "C74456", - id_vars = oak_id_vars() - ) - -# Map topic variable VSALL from raw variable ASMNTDN with the logic if ASMNTDN == 1 then VSTESTCD = VSALL -vs_vsall <- - hardcode_ct( - raw_dat = condition_add(vs_raw, ASMNTDN == 1), - raw_var = "ASMNTDN", - tgt_var = "VSTESTCD", - tgt_val = "VSALL", - ct_spec = study_ct, - ct_clst = "C66741" - ) %>% - dplyr::filter(!is.na(.data$VSTESTCD)) %>% - # Map VSTEST using hardcode_ct algorithm - hardcode_ct( - raw_dat = vs_raw, - raw_var = "ASMNTDN", - tgt_var = "VSTEST", - tgt_val = "Vital Signs", - ct_spec = study_ct, - ct_clst = "C67153", - id_vars = oak_id_vars() - ) -``` - -Now that all the topic variable and its qualifier mappings are complete, combine all the datasets and proceed with mapping qualifiers, identifiers and timing variables appicable to all topic variables. - -```{r, eval=TRUE} -# Combine all the topic variables into a single data frame and map qualifiers -# applicable to all topic variables -vs <- dplyr::bind_rows(vs_vsall, vs_sysbp, vs_diabp, vs_pulse, vs_resp, - vs_temp, vs_oxysat) %>% - # Map qualifiers common to all topic variables - # Map VSDTC using assign_ct algorithm - assign_datetime( - raw_dat = vs_raw, - raw_var = c("VTLD", "VTLTM"), - tgt_var = "VSDTC", - raw_fmt = c(list(c("d-m-y", "dd-mmm-yyyy")), "H:M") - ) %>% - # Map VSTPT from TMPTC using assign_ct - assign_ct( - raw_dat = vs_raw, - raw_var = "TMPTC", - tgt_var = "VSTPT", - ct_spec = study_ct, - ct_clst = "TPT", - id_vars = oak_id_vars() - ) %>% - # Map VSTPTNUM from TMPTC using assign_ct - assign_ct( - raw_dat = vs_raw, - raw_var = "TMPTC", - tgt_var = "VSTPTNUM", - ct_spec = study_ct, - ct_clst = "TPTNUM", - id_vars = oak_id_vars() - ) %>% - # Map VISIT from INSTANCE using assign_ct - assign_ct( - raw_dat = vs_raw, - raw_var = "INSTANCE", - tgt_var = "VISIT", - ct_spec = study_ct, - ct_clst = "VISIT", - id_vars = oak_id_vars() - ) %>% - # Map VISITNUM from INSTANCE using assign_ct - assign_ct( - raw_dat = vs_raw, - raw_var = "INSTANCE", - tgt_var = "VISITNUM", - ct_spec = study_ct, - ct_clst = "VISITNUM", - id_vars = oak_id_vars() - ) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - vs, - display_vars = exprs( - oak_id, raw_source, patient_number, VSTESTCD, VSTEST, VSORRES, VSORRESU, VSPOS, - VSLAT, VSDTC, VSTPT, VSTPTNUM, VISIT, VISITNUM - ) -) -``` - -## Create SDTM derived variables {#derivedvars} - -Create derived variables applicable to all topic variables. - -```{r eval=TRUE} -vs <- vs %>% - dplyr::mutate( - STUDYID = "test_study", - DOMAIN = "VS", - VSCAT = "VITAL SIGNS", - USUBJID = paste0("test_study", "-", .data$patient_number) - ) %>% - # derive_seq(tgt_var = "VSSEQ", - # rec_vars= c("USUBJID", "CMTRT")) %>% - derive_study_day( - sdtm_in = ., - dm_domain = dm, - tgdt = "VSDTC", - refdt = "RFXSTDTC", - study_day_var = "VSDY" - ) %>% - dplyr::select("STUDYID", "DOMAIN", "USUBJID", everything()) -``` - -```{r, eval=TRUE, echo=FALSE} -dataset_oak_vignette( - vs, - display_vars = exprs( - STUDYID, DOMAIN, USUBJID, VSTESTCD, VSTEST, VSORRES, VSORRESU, VSPOS, - VSLAT, VSTPT, VSTPTNUM, VISIT, VISITNUM, VSDTC, VSDY - ) -) -``` - -## Add Labels and Attributes {#attributes} - -Yet to be devleoped. diff --git a/vignettes/articles/iso_8601.Rmd b/vignettes/articles/iso_8601.Rmd deleted file mode 100644 index 222e5d01..00000000 --- a/vignettes/articles/iso_8601.Rmd +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: "Converting dates, times or date-times to ISO 8601" ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -library(sdtm.oak) -``` - -An SDTM DTC variable may include data that is represented in [ISO -8601](https://en.wikipedia.org/wiki/ISO_8601) format as a complete date/time, a -partial date/time, or an incomplete date/time. `{sdtm.oak}` provides the -`create_iso8601()` function that allows flexible mapping of date and time -values in various formats to a single date-time ISO 8601 format. - -## Introduction - -To perform conversion to the ISO 8601 format you need to pass two key arguments: - -- At least one vector of dates, times, or date-times of `character` type; -- A date/time format via the `.format` parameter that instructs `create_iso8601()` on which date/time components to expect. - -```{r} -create_iso8601("2000 01 05", .format = "y m d") -create_iso8601("22:35:05", .format = "H:M:S") -``` - -By default the `.format` parameter understands a few reserved characters: - -- `"y"` for year -- `"m"` for month -- `"d"` for day -- `"H"` for hours -- `"M"` for minutes -- `"S"` for seconds - -Besides character vectors of dates and times, you may also pass a single vector -of date-times, provided you adjust the format: - -```{r} -create_iso8601("2000-01-05 22:35:05", .format = "y-m-d H:M:S") -``` - -## Multiple inputs - -If you have dates and times in separate vectors then you will need to pass -a format for each vector: - -```{r} -create_iso8601("2000-01-05", "22:35:05", .format = c("y-m-d", "H:M:S")) -``` - -In addition, like most R functions that take vectors as input, -`create_iso8601()` is vectorized: - -```{r} -date <- c("2000-01-05", "2001-12-25", "1980-06-18", "1979-09-07") -time <- c("00:12:21", "22:35:05", "03:00:15", "07:09:00") -create_iso8601(date, time, .format = c("y-m-d", "H:M:S")) -``` - -But the number of elements in each of the inputs has to match or you will get an -error: - -```{r} -date <- c("2000-01-05", "2001-12-25", "1980-06-18", "1979-09-07") -time <- "00:12:21" -try(create_iso8601(date, time, .format = c("y-m-d", "H:M:S"))) -``` - -You can combine individual date and time components coming -in as separate inputs; here is a contrived example of year, month and day -together, hour, and minute: - -```{r} -year <- c("99", "84", "00", "80", "79", "1944", "1953") -month_and_day <- c("jan 1", "apr 04", "mar 06", "jun 18", "sep 07", "sep 13", "sep 14") -hour <- c("12", "13", "05", "23", "16", "16", "19") -min <- c("0", "60", "59", "42", "44", "10", "13") -create_iso8601(year, month_and_day, hour, min, .format = c("y", "m d", "H", "M")) -``` - -The `.format` argument must be always named; otherwise, it will be treated as if -it were one of the inputs and interpreted as missing. - -```{r} -try(create_iso8601("2000-01-05", "y-m-d")) -``` - - -## Format variations - -The `.format` parameter can easily accommodate variations in the format of the -inputs: - -```{r} -create_iso8601("2000-01-05", .format = "y-m-d") -create_iso8601("2000 01 05", .format = "y m d") -create_iso8601("2000/01/05", .format = "y/m/d") -``` - -Individual components may come in a different order, so adjust the format -accordingly: - -```{r} -create_iso8601("2000 01 05", .format = "y m d") -create_iso8601("05 01 2000", .format = "d m y") -create_iso8601("01 05, 2000", .format = "m d, y") -``` - -All other individual characters given in the format are taken strictly, e.g. -the number of spaces matters: - -```{r} -date <- c("2000 01 05", "2000 01 05", "2000 01 05", "2000 01 05") -create_iso8601(date, .format = "y m d") -create_iso8601(date, .format = "y m d") -create_iso8601(date, .format = "y m d") -create_iso8601(date, .format = "y m d") -``` - -The format can include regular expressions though: - -```{r} -create_iso8601(date, .format = "y\\s+m\\s+d") -``` - -By default, a streak of the reserved characters is treated as if only one was -provided, so these formats are equivalent: - -```{r} -date <- c("2000-01-05", "2001-12-25", "1980-06-18", "1979-09-07") -time <- c("00:12:21", "22:35:05", "03:00:15", "07:09:00") -create_iso8601(date, time, .format = c("y-m-d", "H:M:S")) -create_iso8601(date, time, .format = c("yyyy-mm-dd", "HH:MM:SS")) -create_iso8601(date, time, .format = c("yyyyyyyy-m-dddddd", "H:MMMMM:SSSS")) -``` - -## Multiple alternative formats - -When an input vector contains values with varying formats, a single format may -not be adequate to encompass all variations. In such situations, it's advisable -to list multiple alternative formats. This approach ensures that each format is -tried sequentially until one matches the data in the vector. - -```{r} -date <- c("2000/01/01", "2000-01-02", "2000 01 03", "2000/01/04") -create_iso8601(date, .format = "y-m-d") -create_iso8601(date, .format = "y m d") -create_iso8601(date, .format = "y/m/d") -create_iso8601(date, .format = list(c("y-m-d", "y m d", "y/m/d"))) -``` - -Consider the order in which you supply the formats, as it can be significant. If -multiple formats could potentially match, the sequence determines which format -is applied first. - -```{r} -create_iso8601("07 04 2000", .format = list(c("d m y", "m d y"))) -create_iso8601("07 04 2000", .format = list(c("m d y", "d m y"))) -``` - -Note that if you are passing alternative formats, then the `.format` argument -must be a list whose length matches the number of inputs. - -## Parsing of date or time components - -By default, date or time components are parsed as follows: - -- year: either parsed from a two- or four-digit year; -- month: either as a numeric month (single or two-digit number) or as an English abbreviated month name (e.g. Jan, Jun or Dec) regardless of case; -- month day: are parsed from two-digit numbers; -- hour and minute: are parsed from single or two-digit numbers; -- second: is parsed from single or two-digit numbers with an optional fractional part. - -```{r} -# Years: two-digit or four-digit numbers. -years <- c("0", "1", "00", "01", "15", "30", "50", "68", "69", "80", "99") -create_iso8601(years, .format = "y") - -# Adjust the point where two-digits years are mapped to 2000's or 1900's. -create_iso8601(years, .format = "y", .cutoff_2000 = 20L) - -# Both numeric months (two-digit only) and abbreviated months work out of the box -months <- c("0", "00", "1", "01", "Jan", "jan") -create_iso8601(months, .format = "m") - -# Month days: single or two-digit numbers, anything else results in NA. -create_iso8601(c("1", "01", "001", "10", "20", "31"), .format = "d") - -# Hours -create_iso8601(c("1", "01", "001", "10", "20", "31"), .format = "H") - -# Minutes -create_iso8601(c("1", "01", "001", "10", "20", "60"), .format = "M") - -# Seconds -create_iso8601(c("1", "01", "23.04", "001", "10", "20", "60"), .format = "S") -``` - -## Allowing alternative date or time values - -If date or time component values include special values, e.g. values -encoding missing values, then you can indicate those values as possible -alternatives such that the parsing will tolerate them; use the `.na` argument: - -```{r} -create_iso8601("U DEC 2019 14:00", .format = "d m y H:M") -create_iso8601("U DEC 2019 14:00", .format = "d m y H:M", .na = "U") - -create_iso8601("U UNK 2019 14:00", .format = "d m y H:M") -create_iso8601("U UNK 2019 14:00", .format = "d m y H:M", .na = c("U", "UNK")) -``` - -In this case you could achieve the same result using regexps: - -```{r} -create_iso8601("U UNK 2019 14:00", .format = "(d|U) (m|UNK) y H:M") -``` - - -## Changing reserved format characters - -There might be cases when the reserved characters --- `"y"`, `"m"`, `"d"`, -`"H"`, `"M"`, `"S"` --- might get in the way of specifying an adequate format. -For example, you might be tempted to use format `"HHMM"` to try to parse a time -such as `"14H00M"`. You could assume that the first "H" codes for parsing the -hour, and the second "H" to be a literal "H" but, actually, `"HH"` will be taken -to mean parsing hours, and `"MM"` to parse minutes. You can use the function -`fmt_cmp()` to specify alternative format regexps for the format, replacing the -default characters. - -In the next example, we reassign new format strings for the hour and minute -components, thus freeing the `"H"` and `"M"` patterns from being interpreted as -hours and minutes, and to be taken literally: - -```{r} -create_iso8601("14H00M", .format = "HHMM") -create_iso8601("14H00M", .format = "xHwM", .fmt_c = fmt_cmp(hour = "x", min = "w")) -``` -Note that you need to make sure that the format component regexps are mutually -exclusive, i.e. they don't have overlapping matches; otherwise -`create_iso8601()` will fail with an error. In the next example both months and -minutes could be represented by an `"m"` in the format resulting in an ambiguous -format specification. - -```{r} -fmt_cmp(hour = "h", min = "m") -try(create_iso8601("14H00M", .format = "hHmM", .fmt_c = fmt_cmp(hour = "h", min = "m"))) -``` - diff --git a/vignettes/articles/just-dm.Rmd b/vignettes/articles/just-dm.Rmd new file mode 100644 index 00000000..9c57e6bf --- /dev/null +++ b/vignettes/articles/just-dm.Rmd @@ -0,0 +1,24 @@ +--- +title: "Just read csv" +output: + rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Just read csv} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) + +library(sdtm.oak) +``` + +```{r eval=TRUE, echo=FALSE} +dm <- system.file("cm_domain/dm.csv", package = "sdtm.oak") |> + read.csv() +nrow(dm) +``` diff --git a/vignettes/articles/sdtm_ct.csv b/vignettes/articles/sdtm_ct.csv deleted file mode 100644 index d078d6ed..00000000 --- a/vignettes/articles/sdtm_ct.csv +++ /dev/null @@ -1,75 +0,0 @@ -codelist_code,term_code,term_value,collected_value,term_preferred_term,term_synonyms -C66726,C25158,CAPSULE,Capsule,Capsule Dosage Form,cap -C66726,C25394,PILL,Pill,Pill Dosage Form, -C66726,C29167,LOTION,Lotion,Lotion Dosage Form, -C66726,C42887,AEROSOL,Aerosol,Aerosol Dosage Form,aer -C66726,C42944,INHALANT,Inhalant,Inhalant Dosage Form, -C66726,C42946,INJECTION,Injection,Injectable Dosage Form, -C66726,C42953,LIQUID,Liquid,Liquid Dosage Form, -C66726,C42998,TABLET,Tablet,Tablet Dosage Form,tab -C66728,C25629,BEFORE,Prior,Prior, -C66728,C53279,ONGOING,Continue,Continue,Continuous -C66729,C28161,INTRAMUSCULAR,IM (Intramuscular),Intramuscular Route of Administration, -C66729,C38210,EPIDURAL,EP (Epidural),Epidural Route of Administration, -C66729,C38222,INTRA-ARTERIAL,IA (Intra-arterial),Intraarterial Route of Administration, -C66729,C38223,INTRA-ARTICULAR,IJ (Intra-articular),Intraarticular Route of Administration, -C66729,C38287,OPHTHALMIC,OP (Ophthalmic),Ophthalmic Route of Administration, -C66729,C38288,ORAL,PO (Oral),Oral Route of Administration,Intraoral Route of Administration; PO -C66729,C38305,TRANSDERMAL,DE (Transdermal),Transdermal Route of Administration, -C66729,C38311,UNKNOWN,Unknown,Unknown Route of Administration, -C66734,C49568,CM,Concomitant Medication Domain,Concomitant Medication Domain,Concomitant/Prior Medications -C66741,C174446,TEMP,Body Temperature,Body Temperature,Body Temperature; Temperature -C66741,C25298,SYSBP,Systolic Blood Pressure,Systolic Blood Pressure,Systolic Blood Pressure -C66741,C25299,DIABP,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure -C66741,C49676,PULSE,Pulse Rate,Pulse Rate,Pulse Rate -C66741,C49678,RESP,Respiratory Rate,Respiratory Rate,Respiratory Rate -C66741,C60832,OXYSAT,Oxygen Saturation Measurement,Oxygen Saturation Measurement,Oxygen Saturation -C66741,V00224,VSALL,VS Domain ALL Tests,VS Domain ALL Tests, -C66742,C49488,Y,Yes,Yes,Yes -C66770,C25613,%,Percentage,Percentage,Percentage -C66770,C42559,C,Degree Celsius,Degree Celsius,Degree Celsius -C66770,C49670,mmHg,Millimeter of Mercury,Millimeter of Mercury,Millimeter of Mercury -C66770,C49673,beats/min,Beats per Minute,Beats per Minute,Beats per Minute; BPM; bpm -C66770,C49674,breaths/min,Breaths per Minute,Breaths per Minute,Breaths per Minute -C66789,C49484,NOT DONE,Not Done,Not Done, -C67153,C174446,Temperature,Body Temperature,Body Temperature,Body Temperature; Temperature -C67153,C25298,Systolic Blood Pressure,Systolic Blood Pressure,Systolic Blood Pressure,Systolic Blood Pressure -C67153,C25299,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure,Diastolic Blood Pressure -C67153,C49676,Pulse Rate,Pulse Rate,Pulse Rate,Pulse Rate -C67153,C49678,Respiratory Rate,Respiratory Rate,Respiratory Rate,Respiratory Rate -C67153,C60832,Oxygen Saturation,Oxygen Saturation Measurement,Oxygen Saturation Measurement,Oxygen Saturation -C67153,V00224,Vital Signs,VS Domain ALL Tests,VS Domain ALL Tests, -C71113,C25473,QD,QD (Every Day),Daily,/day; Daily; Per Day -C71113,C64496,BID,BID (Twice a Day),Twice Daily,BD; Twice per day -C71113,C64499,PRN,PRN (As Needed),As Needed,As needed -C71113,C64516,Q2H,Q2H (Every 2 Hours),Every Two Hours,Every 2 hours -C71113,C64530,QID,QID (4 Times a Day),Four Times Daily,4 times per day -C71148,C111310,SEMI-RECUMBENT,Semi-Supine,Semi-Supine,Semi-Supine -C71148,C62122,SITTING,Sitting,Sitting,Sitting -C71148,C62165,PRONE,Prone,Prone,Prone -C71148,C62166,STANDING,Standing,Standing,Orthostatic; Standing -C71148,C62167,SUPINE,Supine,Supine,Supine -C71620,C25613,%,%,Percentage,Percentage -C71620,C28253,mg,mg,Milligram,Milligram -C71620,C28254,mL,mL,Milliliter,cm3; Milliliter -C71620,C48155,g,g,Gram,Gram -C71620,C48480,CAPSULE,Capsule,Capsule Dosing Unit,cap; Capsule Dosing Unit -C71620,C48542,TABLET,Tablet,Tablet Dosing Unit,tab; Tablet Dosing Unit -C71620,C48579,IU,IU,International Unit,IE; International Unit -C74456,C12390,RECTUM,Rectum,Rectum, -C74456,C12421,ORAL CAVITY,Oral Cavity,Oral Cavity,Buccal cavity; Mouth -C74456,C12470,SKIN,Skin,Skin,Integument; Skin -C74456,C12502,TYMPANIC MEMBRANE,Tympanic Membrane,Tympanic Membrane,Tympanic Membrane -C74456,C12674,AXILLA,Axilla,Axilla,Armpit; Axilla -C74456,C32608,FINGER,FINGER,FINGER,Finger -C74456,C89803,FOREHEAD,Forehead,Forehead,Forehead -C99073,C25228,RIGHT,Right,Right, -C99073,C25229,LEFT,Left,Left, -TPT,TPT,PREDOSE,Pre-dose,, -TPT,TPT,POSTDOSE,Post-dose,, -TPTNUM,TPTNUM,1,Pre-dose,, -TPTNUM,TPTNUM,2,Post-dose,, -VISITNUM,VISITNUM,1,Screening,, -VISITNUM,VISITNUM,2,Visit 1,, -VISIT,VISIT,SCREENING,Screening,, -VISIT,VISIT,VISIT 1,Visit 1,, \ No newline at end of file diff --git a/vignettes/articles/study_sdtm_spec.Rmd b/vignettes/articles/study_sdtm_spec.Rmd deleted file mode 100644 index 3f4715d2..00000000 --- a/vignettes/articles/study_sdtm_spec.Rmd +++ /dev/null @@ -1,361 +0,0 @@ ---- -title: "All about Metadata" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{All about Metadata} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- -```{css, echo = FALSE} -#pkgdown-sidebar{ - display:none; -} -.wide .table{ - font-size: 8px; - overflow: visible -} - -``` - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -options(rmarkdown.html_vignette.check_title = FALSE) -``` - -{sdtm.oak} is a metadata-driven solution that is designed to be Electronic Data -Capture (EDC) and standards agnostic. Throughout this article, the term "metadata" -is used several times. In this context, "metadata" refers to the specific -metadata used by {sdtm.oak}. This article aims to provide users with a more -detailed understanding of the {sdtm.oak} metadata. - -In general, metadata can be defined as "data about data." It does not include -any patient-level data. Instead, the metadata provides a blueprint of the data -that needs to be collected during a study. - -## Standards Metadata - -The standards metadata used in {sdtm.oak} is sourced from the CDISC Library or -sponsor MDR or any other form of documentation where standards are maintained. -This metadata provides information on the following: - -- The relationship between Data Collection Standards (eCRF & eDT), SDTM mapping, -and Controlled Terminology -- Machine-readable standard SDTM mappings -- Algorithms and associated metadata required for the SDTM automation of -standards in the study. - -In the upcoming releases of {sdtm.oak}, we will effectively utilize the -standards metadata and customize it to meet the study requirements. - -## Study Definition Metadata - -Study Definition Metadata is also referred to as Study Metadata. Study Definition -Metadata provides information about the eCRF and eDT data collected in the study. - -**eCRF Metadata** -The eCRF Design Metadata is fetched from the EDC system. This Metadata includes - -- Forms Metadata: Identifier, eCRF label, Repeating format and other properties of the eCRF. - -- Fields Metadata: Identifier, question label, datatype, and other -properties of data collection fields in the study. - -- Data Dictionaries: Identifier and the controlled terms collected at the -source. - -- Visits: Name of the visits as defined in the EDC. - -**eDT Metadata** - -eDT Metadata is the blueprint metadata that describes the data collected as part of that external data transfer (from clinical sites to the sponsor). This includes - -- Dataset name, label, repeating properties, etc. - -- Variable name, datatype, label and associated codelist, etc. - - -## Study SDTM Mappings Metadata (specifications) - -Study SDTM mappings metadata is the study SDTM specification. To develop the SDTM domains, {sdtm.oak} requires the user to prepare the Study SDTM mappings metadata. Unlike the conventional SDTM specification, which includes one tab per domain defining the target (SDTM domain, Variables) to source (raw dataset, raw variables) and SDTM mappings, the SDTM spec for {sdtm.oak} defines the source-to-target relationship. For each source, the SDTM mapping, algorithms, and associated metadata are defined. The table below presents the columns in the SDTM mapping specification and its explanation. - -
-```{r echo = FALSE, results = "asis"} -library(knitr) -definition <- data.frame( - Variable_Name = c( - "study_number", - "raw_source_model", - "raw_dataset", - "raw_dataset_ordinal", - "raw_dataset_label", - "raw_variable", - "raw_variable_label", - "raw_variable_ordinal", - "raw_variable_type", - "raw_data_format", - "raw_codelist", - "study_specific", - "annotation_ordinal", - "mapping_is_dataset", - "annotation_text", - "target_domain", - "target_sdtm_variable", - "target_sdtm_variable_role", - "target_sdtm_variable_codelist_code", - paste( - "target_sdtm_variable_", - "controlled_terms_or_format" - ), - "target_sdtm_variable_ordinal", - "origin", - "mapping_algorithm", - "sub_algorithm", - "target_hardcoded_value", - "target_term_value", - "target_term_code", - "condition_ordinal", - "condition_group_ordinal", - "condition_left_raw_dataset", - "condition_left_raw_variable", - "condition_left_sdtm_domain", - "condition_left_sdtm_variable", - "condition_operator", - "condition_right_text_value", - "condition_right_sdtm_domain", - "condition_right_sdtm_variable", - "condition_right_raw_dataset", - "condition_right_raw_variable", - "condition_next_logical_operator", - "merge_type", - "merge_left", - "merge_right", - "merge_condition", - "unduplicate_keys", - "groupby_keys", - "target_resource_dataset", - "target_resource_variable" - ), - `Description_of_the_variable` = c( - "Study Number", - "Data Collection model", - "Name of the raw or source dataset", - "Ordinal of the raw dataset as defined in EDC or eDT specification", - "Label of the raw or source dataset", - "Name of the raw variable", - "Label of the raw variable", - paste( - "Ordinal of the variable as defined in the eCRF or", - "eDT specification" - ), - "Type of the Raw Variable", - "Data format of the raw variable", - paste( - "Dictionary name which is assigned to the ", - "eCRF field or a eDT variable" - ), - paste( - "`TRUE` indicates that the source is study specific. ", - "`FALSE` indicates that the raw variable is part of data standards" - ), - "Ordinal of the SDTM mappings for the particular raw source", - paste( - "Indicates if the SDTM mapping is at the dataset level. ", - "`TRUE` indicates that it is dataset level mapping." - ), - "SDTM mapping text or annotation text", - "Name of the target domain.", - "Name of the target SDTM variable", - "CDISC Role for the SDTM target variable defined in the annotation.", - paste( - "NCI or sponsor code of the codelist assigned to the ", - "SDTM target variable defined in the annotation." - ), - paste( - "Controlled terms or format for the target variable ", - "defined in the annotation (as defined per CDISC).", - "`target_sdtm_variable_controlled_terms_or_format` is required ", - "for SDTM Define.xml" - ), - "Ordinal of the target SDTM variable", - "Origin of metadata source, values are subject to controlled terminology", - "Mapping Algorithm", - "The sub-algorithm (scenario) of the source-to-target mapping", - "Text (Hardcoded value) that applies to the target.", - paste( - "CDISC Submission value or sponsor value which represents a", - "hardcoded text" - ), - paste( - "NCI code or sponsor code of the hardcoded value" - ), - paste( - "Ordinal of a (sub)condition, increasing when there ", - "are more than one sub-conditions (e.g. X AND Y)" - ), - paste( - "Ordinal of a group of sub-conditions, used to ", - "disambiguate complex conditions such as (A AND B) OR C. ", - "The ordinal increases in each group and gives the final ", - "precedence of the logical operators." - ), - "Name of the raw dataset on the left part of the condition", - "Name of the raw variable on the left part of the condition", - "Name of the SDTM variable used in the left part of the condition.", - paste( - "Name of the SDTM domain of the variable that is used in ", - "the left part of the condition." - ), - "Operator between the left and right part of the condition", - paste( - "A text that applies to the right part of the condition as ", - "indicated per `condition_operator`." - ), - "Name of the SDTM variable used in the right part of the condition.", - paste( - "Name of the SDTM domain of the variable that is used ", - "in the right part of the condition." - ), - "Name of the raw dataset on the right part of the condition", - "Name of the raw variable on the right part of the condition", - paste( - "The logical operator that applies to the next ", - "sub-conditions, typically AND, OR" - ), - "Specifies the type of join", - "Specifies the left component of the merge", - "Specifies the right component of the merge", - paste( - "Specify the condition of the join (e.g. a specific ", - "variable that should match in the components of the merge)" - ), - paste( - "Raw variables that should be used to determine whether ", - "an observation in the source data is a duplicate record and ", - "subject to being removed" - ), - paste( - "Raw Variables or aggregation functions (i.e. earliest, ", - "latest) to group source data records before mapping to SDTM" - ), - paste( - "Raw dataset name of the raw variable. This will be used when ", - " values are assigned from a from a different source", - "other than the source the mapping is associated with" - ), - paste( - "Raw variable name. This will be used when ", - "values are assigned from a from a different source", - "other than the source the mapping is associated with" - ) - ), - Example_Values = c( - "test_study", - "e-CRF or eDT", - "VTLS1, DEM", - "1, 2, 3, etc", - "Vital Signs,
Demographics", - "SEX_001,
BRTHDD", - "Systolic Blood Pressure,
Birth Day", - "1, 2, 3, etc", - "Text Box,
Date control", - "$200,
dd MON YYYY", - "SEX, ETHNIC", - "TRUE, FALSE", - "1, 2, 3, etc", - "TRUE, FALSE", - "VS.VSORRES when VSTESTCD = 'SYSBP'", - "VS, MH", - "VSORRES, MHSTDTC", - "Topic Variable,
Grouping Qualifier,
Identifier Variable", - "C66742
C66790", - "(AGEU)
ISO 8601
(SEX)", - "1, 2, 3", - "Derived,
Assigned,
Collected,
Predecessor", - "DATASET_LEVEL
ASSIGN_CT
AE_AEREL
HARDCODE_CT", - "ASSIGN_NO_CT
HARCODE_CT", - "ALZHEIMER'S DISEASE HISTORY", - "Y,
beats/min,
INFORMED CONSENT OBTAINED", - "C49488", - "1, 2, 3", - "1, 2, 3", - "VTLS1", - "POSITION", - "AE", - "AEENRTPT", - paste( - "Checked
Not_checked
Is_null
Is_not_null", - "
Equals_to
Different_to
is_numeric
in", - "
not_in" - ), - "('Not Recovered/Not Resolved','Recovering/Resolving')
HOSPITALIZATION", - "AE", - "AETERM", - "SMKHX", - "SUNAM", - "and, or", - "left_join
right_join
full_join
visit_join
subject_join", - "VTLS1", - "VACREC", - "VTLS1.SUBJECT = VACREC.SUBJECT,
MD1.MDNUM = VACREC.MDNUM", - "VTLS1.SUBJECT,
VTLS1.DATAPAGEID", - "TXINF1.DATAPGID,
Earliest", - "AEDE", - "DATAPAGEID" - ), - Association_with_mapping_Algorithms = c( - "Generic Use", - "Generic Use", - "Required for all mapping algorithms", - "Generic Use", - "Generic Use", - "Generic Use", - "Generic Use", - "Generic Use", - "Required for all mapping algorithms", - "Required for all mapping algorithms", - "Required for all mapping algorithms", - "Generic Use", - "Required for all mapping algorithms", - "Required for all mapping algorithms", - "Generic Use", - "Required for all mapping algorithms", - "Required for all mapping algorithms", - "Required for all mapping algorithms", - "Required for all mapping algorithms", - "Generic Use", - "Required for all mapping algorithms", - "Used for define.xml", - "Required for all mapping algorithms", - "Only when Mapping Algorithm is
IF_THEN_ELSE
DATASET_LEVEL", - "ASSIGN_NO_CT
HARDCODE_NO_CT", - "HARDCODE_CT", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "IF_THEN_ELSE", - "MERGE", - "MERGE", - "MERGE", - "MERGE", - "REMOVE_DUP", - "GROUP_BY", - "ASSIGN_NO_CT", - "ASSIGN_NO_CT" - ), - stringsAsFactors = TRUE -) -knitr::kable(definition) -``` diff --git a/vignettes/articles/vitals_raw_data.csv b/vignettes/articles/vitals_raw_data.csv deleted file mode 100644 index 8b164c22..00000000 --- a/vignettes/articles/vitals_raw_data.csv +++ /dev/null @@ -1,7 +0,0 @@ -STUDY,PATNUM,SUBJSTAT,SITENM,INSTANCE,FORM,FORML,DATAPGID,RECORDID,RECPOS,ASMNTDN,TMPTC,VTLD,VTLTM,SUBPOS,SYS_BP,DIA_BP,PULSE,RESPRT,TEMP,TEMPLOC,OXY_SAT,LAT,LOC,VSO2SRC,NEWS107 -Test Study,375,Randomized,Test Study,VISIT1,VTLS1,Vital Signs,1752329,5734754,0,0,Pre-dose,16-May-15,7:25,PRONE,158,92,63,17,40.48,SKIN,98,RIGHT,FINGER,MASK OXYGEN THERAPY,UNRESPONSIVE -Test Study,375,Randomized,Test Study,VISIT1,VTLS1,Vital Signs,8153061,3712412,1,0,Post-dose,16-May-15,10:25,SEMI-RECUMBENT,94,78,76,20,36.75,TYMPANIC MEMBRANE,99,LEFT,FINGER,ROOM AIR,NEW CONFUSION -Test Study,375,Randomized,Test Study,Screening,VTLS1,Vital Signs,3463516,1229594,0,0,,6-May-18,2:01,PRONE,117,62,66,15,29.45,ORAL CAVITY,96,LEFT,FINGER,ROOM AIR,VERBAL RESPONSIVE -Test Study,376,Randomized,Test Study,Screening,VTLS1,Vital Signs,8423253,9767053,0,1,,,,,,,,,,,,,,, -Test Study,376,Randomized,Test Study,VISIT1,VTLS1,Vital Signs,1211365,1567778,0,0,Pre-dose,23-Oct-08,1:19,PRONE,85,68,73,21,38.25,AXILLA,93,RIGHT,FINGER,ROOM AIR,ALERT -Test Study,376,Randomized,Test Study,VISIT1,VTLS1,Vital Signs,5880552,7060998,0,0,Post-dose,23-Oct-08,3:19,PRONE,126,81,56,18,38.08,TYMPANIC MEMBRANE,93,LEFT,FINGER,MASK OXYGEN THERAPY,PAIN RESPONSIVE \ No newline at end of file