From cfdd7da9837afde6347ff46ecac283f60881b2b8 Mon Sep 17 00:00:00 2001
From: Michael Aydinbas <michael.aydinbas@gmail.com>
Date: Sat, 23 Dec 2023 23:31:30 +0100
Subject: [PATCH 1/8] wip: adapt first script to use future and furrr for
 parallelizing main loop

---
 .Rprofile                               |   2 +-
 DESCRIPTION                             |   4 +-
 NAMESPACE                               |   6 +
 R/helper_main.R                         |   3 +
 R/helper_scipt_1.R                      | 154 +++++++++++++++++++
 man/process_patient_data.Rd             |  25 ++++
 man/process_product_data.Rd             |  25 ++++
 man/process_tracker_file.Rd             |  18 +++
 renv.lock                               | 151 +++++++++++++++----
 scripts/run_script_1_extract_raw_data.R | 189 ++++--------------------
 10 files changed, 381 insertions(+), 196 deletions(-)
 create mode 100644 R/helper_scipt_1.R
 create mode 100644 man/process_patient_data.Rd
 create mode 100644 man/process_product_data.Rd
 create mode 100644 man/process_tracker_file.Rd

diff --git a/.Rprofile b/.Rprofile
index b3b3274..eec9766 100644
--- a/.Rprofile
+++ b/.Rprofile
@@ -3,5 +3,5 @@ source("renv/activate.R")
 if (interactive()) {
     require("devtools", quietly = TRUE)
     # automatically attaches usethis
-    devtools::load_all()
+    devtools::install()
 }
diff --git a/DESCRIPTION b/DESCRIPTION
index 4be403b..a1d7052 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,7 +25,9 @@ Imports:
     tidyr,
     yaml,
     zoo,
-    arrow
+    arrow,
+    furrr,
+    progressr
 Suggests: 
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 8e7d6d3..f8e779f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,9 +14,15 @@ export(fix_id)
 export(fix_sex)
 export(fix_t1d_diagnosis_age)
 export(fix_testing_frequency)
+export(get_files)
+export(get_synonyms)
 export(get_tracker_year)
 export(harmonize_patient_data_columns)
 export(id_2_county_hospisal)
+export(init_paths)
+export(process_patient_data)
+export(process_product_data)
+export(process_tracker_file)
 export(read_cleaned_patient_data)
 export(read_column_synonyms)
 export(report_empty_intersections)
diff --git a/R/helper_main.R b/R/helper_main.R
index 50c9383..ca8d1a3 100644
--- a/R/helper_main.R
+++ b/R/helper_main.R
@@ -12,6 +12,7 @@
 #' @param delete If TRUE, delete all files under output.
 #'
 #' @return A list with tracker_root_path and output_root path
+#' @export
 init_paths <- function(names, output_dir_name = "output", delete = FALSE) {
     paths <- list()
     tracker_root_path <- select_A4D_directory()
@@ -57,6 +58,7 @@ init_paths <- function(names, output_dir_name = "output", delete = FALSE) {
 #' @param pattern The search pattern to filter files.
 #'
 #' @return A vector with file names.
+#' @export
 get_files <- function(tracker_root, pattern = "\\.xlsx$") {
     tracker_files <- list.files(path = tracker_root, recursive = T, pattern = pattern)
     tracker_files <-
@@ -70,6 +72,7 @@ get_files <- function(tracker_root, pattern = "\\.xlsx$") {
 #' Read in all defined synonyms from the YAML files inside the synonyms folder.
 #'
 #' @return A list with both patient and product data synonyms as tibble.
+#' @export
 get_synonyms <- function() {
     ## Extract synonyms for products and patients
     ## If you encounter new columns, just add the synonyms to these YAML files
diff --git a/R/helper_scipt_1.R b/R/helper_scipt_1.R
new file mode 100644
index 0000000..222d5f1
--- /dev/null
+++ b/R/helper_scipt_1.R
@@ -0,0 +1,154 @@
+
+
+#' @title Process a single tracker file and extract patient and product data.
+#'
+#' @param tracker_file Filename of the tracler.
+#' @param paths a list with the paths to the tracker root dir, the patient and product output dir and the root output dir.
+#' @param p progressor from progressr package.
+#'
+#' @export
+process_tracker_file <- function(tracker_file, paths, p) {
+    p()
+    tracker_name <- tools::file_path_sans_ext(basename(tracker_file))
+    synonyms <- get_synonyms()
+    tracker_data_file <-
+        file.path(paths$tracker_root, tracker_file)
+    logDebug("Start process_tracker_file.")
+    logInfo(
+        "Current file: ",
+        tracker_name
+    )
+
+    logfile <- paste0(tracker_name, "_", "patient")
+    with_file_logger(logfile,
+                     {
+                         tryCatch(
+                             process_patient_data(
+                                 tracker_name = tracker_name,
+                                 tracker_data_file = tracker_data_file,
+                                 output_root = paths$patient_data_raw,
+                                 synonyms_patient = synonyms$patient
+                             ),
+                             error = function(e) {
+                                 logError("Could not process patient data. Error = ", e$message, ".")
+                             },
+                             warning = function(w) {
+                                 logWarn("Could not process patient data. Warning = ", w$message, ".")
+                             }
+                         )
+                     },
+                     output_root = paths$output_root
+    )
+
+    logfile <- paste0(tracker_name, "_", "product")
+
+    with_file_logger(logfile,
+                     {
+                         tryCatch(
+                             process_product_data(
+                                 tracker_name = tracker_name,
+                                 tracker_data_file = tracker_data_file,
+                                 output_root = paths$product_data_raw,
+                                 synonyms_product = synonyms$product
+                             ),
+                             error = function(e) {
+                                 logError("Could not process product data. Error = ", e$message, ".")
+                             },
+                             warning = function(w) {
+                                 logWarn("Could not process product data. Warning = ", w$message, ".")
+                             }
+                         )
+                     },
+                     output_root = paths$output_root
+    )
+
+    logInfo("Finish process_tracker_file.")
+}
+
+
+#' @title Extract patient data.
+#'
+#' @param tracker_name Filename without extension.
+#' @param tracker_data_file Filename of the tracker.
+#' @param output_root Directory for storing extracted patient data.
+#' @param synonyms_patient Synonyms for patient data header names.
+#'
+#' @export
+process_patient_data <-
+    function(tracker_name,
+             tracker_data_file,
+             output_root,
+             synonyms_patient) {
+        logDebug("Start process_patient_data.")
+
+        df_raw_patient <-
+            reading_patient_data(
+                tracker_data_file = tracker_data_file,
+                columns_synonyms = synonyms_patient
+            )
+
+        df_raw_patient <- df_raw_patient %>% dplyr::mutate(file_name = tracker_name)
+
+        logDebug(
+            "df_raw_patient dim: ",
+            dim(df_raw_patient) %>% as.data.frame(),
+            "."
+        )
+
+        export_data_as_parquet(
+            data = df_raw_patient,
+            filename = tracker_name,
+            output_root = output_root,
+            suffix = "_patient_raw"
+        )
+
+        logInfo("Finish process_patient_data.")
+    }
+
+
+#' @title Extract product data.
+#'
+#' @param tracker_name Filename without extension.
+#' @param tracker_data_file Filename of the tracker.
+#' @param output_root Directory for storing extracted product data.
+#' @param synonyms_product Synonyms for product data header names.
+#'
+#' @export
+process_product_data <-
+    function(tracker_name,
+             tracker_data_file,
+             output_root,
+             synonyms_product) {
+        logDebug("Start process_product_data.")
+
+        df_raw_product <-
+            reading_product_data_step1(
+                tracker_data_file = tracker_data_file,
+                columns_synonyms = synonyms_product
+            )
+
+        if (!is.null(df_raw_product)) {
+            df_raw_product <- df_raw_product %>% dplyr::mutate(file_name = tracker_name)
+        } else {
+            logDebug("Empty product data")
+        }
+
+        logDebug(
+            "df_raw_product dim: ",
+            dim(df_raw_product) %>% as.data.frame(),
+            "."
+        )
+
+        # product set sensitive column to NA and add tracker file name as a column
+        if (!is.null(df_raw_product)) {
+            export_data_as_parquet(
+                data = df_raw_product,
+                filename = tracker_name,
+                output_root = output_root,
+                suffix = "_product_raw"
+            )
+        } else {
+            logWarn("No product data in the file")
+        }
+        logDebug("Finish process_product_data.")
+    }
diff --git a/man/process_patient_data.Rd b/man/process_patient_data.Rd
new file mode 100644
index 0000000..e1334b2
--- /dev/null
+++ b/man/process_patient_data.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper_scipt_1.R
+\name{process_patient_data}
+\alias{process_patient_data}
+\title{Extract patient data.}
+\usage{
+process_patient_data(
+  tracker_name,
+  tracker_data_file,
+  output_root,
+  synonyms_patient
+)
+}
+\arguments{
+\item{tracker_name}{Filename without extension.}
+
+\item{tracker_data_file}{Filename of the tracker.}
+
+\item{output_root}{Directory for storing extracted patient data.}
+
+\item{synonyms_patient}{Synonyms for patient data header names.}
+}
+\description{
+Extract patient data.
+}
diff --git a/man/process_product_data.Rd b/man/process_product_data.Rd
new file mode 100644
index 0000000..0109c18
--- /dev/null
+++ b/man/process_product_data.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper_scipt_1.R
+\name{process_product_data}
+\alias{process_product_data}
+\title{Extract product data.}
+\usage{
+process_product_data(
+  tracker_name,
+  tracker_data_file,
+  output_root,
+  synonyms_product
+)
+}
+\arguments{
+\item{tracker_name}{Filename without extension.}
+
+\item{tracker_data_file}{Filename of the tracker.}
+
+\item{output_root}{Directory for storing extracted product data.}
+
+\item{synonyms_product}{Synonyms for product data header names.}
+}
+\description{
+Extract product data.
+}
diff --git a/man/process_tracker_file.Rd b/man/process_tracker_file.Rd
new file mode 100644
index 0000000..b9728ae
--- /dev/null
+++ b/man/process_tracker_file.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper_scipt_1.R
+\name{process_tracker_file}
+\alias{process_tracker_file}
+\title{Process a single tracker file and extract patient and product data.}
+\usage{
+process_tracker_file(paths, tracker_file, tracker_name)
+}
+\arguments{
+\item{paths}{a list with the paths to the tracker root dir, the patient and product output dir and the root output dir.}
+
+\item{tracker_file}{filename.}
+
+\item{tracker_name}{filename without extension.}
+}
+\description{
+Process a single tracker file and extract patient and product data.
+}
diff --git a/renv.lock b/renv.lock
index 9a44c60..5cb4660 100644
--- a/renv.lock
+++ b/renv.lock
@@ -65,7 +65,7 @@
     },
     "R.utils": {
       "Package": "R.utils",
-      "Version": "2.12.2",
+      "Version": "2.12.3",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -76,7 +76,7 @@
         "tools",
         "utils"
       ],
-      "Hash": "325f01db13da12c04d8f6e7be36ff514"
+      "Hash": "3dc2829b790254bfba21e60965787651"
     },
     "R6": {
       "Package": "R6",
@@ -101,7 +101,7 @@
     },
     "arrow": {
       "Package": "arrow",
-      "Version": "13.0.0.1",
+      "Version": "14.0.0.2",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -119,7 +119,7 @@
         "utils",
         "vctrs"
       ],
-      "Hash": "5f5124b727a451be32d23dcd2c41ebe8"
+      "Hash": "042f2ee2286a91abe5a3d66c9be92380"
     },
     "askpass": {
       "Package": "askpass",
@@ -247,14 +247,14 @@
     },
     "cli": {
       "Package": "cli",
-      "Version": "3.6.1",
+      "Version": "3.6.2",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "R",
         "utils"
       ],
-      "Hash": "89e6d8219950eac806ae0c489052048a"
+      "Hash": "1216ac65ac55ec0058a6f75d7ca0fd52"
     },
     "clipr": {
       "Package": "clipr",
@@ -266,6 +266,16 @@
       ],
       "Hash": "3f038e5ac7f41d4ac41ce658c85e3042"
     },
+    "codetools": {
+      "Package": "codetools",
+      "Version": "0.2-19",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R"
+      ],
+      "Hash": "c089a619a7fae175d149d89164f8c7d8"
+    },
     "commonmark": {
       "Package": "commonmark",
       "Version": "1.9.0",
@@ -275,13 +285,13 @@
     },
     "cpp11": {
       "Package": "cpp11",
-      "Version": "0.4.6",
+      "Version": "0.4.7",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "R"
       ],
-      "Hash": "707fae4bbf73697ec8d85f9d7076c061"
+      "Hash": "5a295d7d963cc5035284dcdbaf334f4e"
     },
     "crayon": {
       "Package": "crayon",
@@ -321,14 +331,14 @@
     },
     "data.table": {
       "Package": "data.table",
-      "Version": "1.14.8",
+      "Version": "1.14.10",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "R",
         "methods"
       ],
-      "Hash": "b4c06e554f33344e044ccd7fdca750a9"
+      "Hash": "6ea17a32294d8ca00455825ab0cf71b9"
     },
     "desc": {
       "Package": "desc",
@@ -426,7 +436,7 @@
     },
     "dplyr": {
       "Package": "dplyr",
-      "Version": "1.1.3",
+      "Version": "1.1.4",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -445,7 +455,7 @@
         "utils",
         "vctrs"
       ],
-      "Hash": "e85ffbebaad5f70e1a2e2ef4302b4949"
+      "Hash": "fedd9d00c2944ff00a0e2696ccf048ec"
     },
     "ellipsis": {
       "Package": "ellipsis",
@@ -471,7 +481,7 @@
     },
     "fansi": {
       "Package": "fansi",
-      "Version": "1.0.5",
+      "Version": "1.0.6",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -479,7 +489,7 @@
         "grDevices",
         "utils"
       ],
-      "Hash": "3e8583a60163b4bc1a80016e63b9959e"
+      "Hash": "962174cf2aeb5b9eea581522286a911f"
     },
     "fastmap": {
       "Package": "fastmap",
@@ -511,6 +521,37 @@
       ],
       "Hash": "47b5f30c720c23999b913a1a635cf0bb"
     },
+    "furrr": {
+      "Package": "furrr",
+      "Version": "0.3.1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "future",
+        "globals",
+        "lifecycle",
+        "purrr",
+        "rlang",
+        "vctrs"
+      ],
+      "Hash": "da7a4c32196cb2262a41dd5a25d486ff"
+    },
+    "future": {
+      "Package": "future",
+      "Version": "1.33.1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "digest",
+        "globals",
+        "listenv",
+        "parallel",
+        "parallelly",
+        "utils"
+      ],
+      "Hash": "e57e292737f7a4efa9d8a91c5908222c"
+    },
     "generics": {
       "Package": "generics",
       "Version": "0.1.3",
@@ -563,6 +604,17 @@
       ],
       "Hash": "ab08ac61f3e1be454ae21911eb8bc2fe"
     },
+    "globals": {
+      "Package": "globals",
+      "Version": "0.16.2",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "codetools"
+      ],
+      "Hash": "baa9585ab4ce47a9f4618e671778cc6f"
+    },
     "glue": {
       "Package": "glue",
       "Version": "1.6.2",
@@ -709,13 +761,13 @@
     },
     "jsonlite": {
       "Package": "jsonlite",
-      "Version": "1.8.7",
+      "Version": "1.8.8",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "methods"
       ],
-      "Hash": "266a20443ca13c65688b2116d5220f76"
+      "Hash": "e1b9c55281c5adc4dd113652d9e26768"
     },
     "knitr": {
       "Package": "knitr",
@@ -772,6 +824,16 @@
       ],
       "Hash": "b8552d117e1b808b09a832f589b79035"
     },
+    "listenv": {
+      "Package": "listenv",
+      "Version": "0.9.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R"
+      ],
+      "Hash": "4fbd3679ec8ee169ba28d4b1ea7d0e8f"
+    },
     "lubridate": {
       "Package": "lubridate",
       "Version": "1.9.3",
@@ -855,6 +917,18 @@
       ],
       "Hash": "c03b4c18d42da881fb8e15a085c2b9d6"
     },
+    "parallelly": {
+      "Package": "parallelly",
+      "Version": "1.36.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "parallel",
+        "tools",
+        "utils"
+      ],
+      "Hash": "bca377e1c87ec89ebed77bba00635b2e"
+    },
     "pillar": {
       "Package": "pillar",
       "Version": "1.9.0",
@@ -998,16 +1072,29 @@
     },
     "progress": {
       "Package": "progress",
-      "Version": "1.2.2",
+      "Version": "1.2.3",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
+        "R",
         "R6",
         "crayon",
         "hms",
         "prettyunits"
       ],
-      "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061"
+      "Hash": "f4625e061cb2865f111b47ff163a5ca6"
+    },
+    "progressr": {
+      "Package": "progressr",
+      "Version": "0.14.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "digest",
+        "utils"
+      ],
+      "Hash": "ac50c4ffa8f6a46580dd4d7813add3c4"
     },
     "promises": {
       "Package": "promises",
@@ -1347,7 +1434,7 @@
     },
     "stringi": {
       "Package": "stringi",
-      "Version": "1.7.12",
+      "Version": "1.8.3",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -1356,11 +1443,11 @@
         "tools",
         "utils"
       ],
-      "Hash": "ca8bd84263c77310739d2cf64d84d7c9"
+      "Hash": "058aebddea264f4c99401515182e656a"
     },
     "stringr": {
       "Package": "stringr",
-      "Version": "1.5.0",
+      "Version": "1.5.1",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -1373,7 +1460,7 @@
         "stringi",
         "vctrs"
       ],
-      "Hash": "671a4d384ae9d32fc47a14e98bfa3dc8"
+      "Hash": "960e2ae9e09656611e0b8214ad543207"
     },
     "styler": {
       "Package": "styler",
@@ -1608,7 +1695,7 @@
     },
     "vctrs": {
       "Package": "vctrs",
-      "Version": "0.6.4",
+      "Version": "0.6.5",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -1618,11 +1705,11 @@
         "lifecycle",
         "rlang"
       ],
-      "Hash": "266c1ca411266ba8f365fcc726444b87"
+      "Hash": "c03fa420630029418f7e6da3667aac4a"
     },
     "vroom": {
       "Package": "vroom",
-      "Version": "1.6.4",
+      "Version": "1.6.5",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -1644,7 +1731,7 @@
         "vctrs",
         "withr"
       ],
-      "Hash": "9db52c1656cf19c124f93124ea57f0fd"
+      "Hash": "390f9315bc0025be03012054103d227c"
     },
     "waldo": {
       "Package": "waldo",
@@ -1697,14 +1784,16 @@
     },
     "xml2": {
       "Package": "xml2",
-      "Version": "1.3.5",
+      "Version": "1.3.6",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "R",
-        "methods"
+        "cli",
+        "methods",
+        "rlang"
       ],
-      "Hash": "6c40e5cfcc6aefd88110666e18c31f40"
+      "Hash": "1d0336142f4cd25d8d23cd3ba7a8fb61"
     },
     "xopen": {
       "Package": "xopen",
@@ -1731,10 +1820,10 @@
     },
     "yaml": {
       "Package": "yaml",
-      "Version": "2.3.7",
+      "Version": "2.3.8",
       "Source": "Repository",
       "Repository": "CRAN",
-      "Hash": "0d0056cc5383fbc240ccd0cb584bf436"
+      "Hash": "29240487a071f535f5e5d5a323b7afbd"
     },
     "zip": {
       "Package": "zip",
diff --git a/scripts/run_script_1_extract_raw_data.R b/scripts/run_script_1_extract_raw_data.R
index 5f6f5c3..d4961ca 100644
--- a/scripts/run_script_1_extract_raw_data.R
+++ b/scripts/run_script_1_extract_raw_data.R
@@ -1,168 +1,31 @@
 options(readxl.show_progress = FALSE)
-
-main <- function() {
-    paths <- init_paths(c("patient_data_raw", "product_data_raw"), delete = TRUE)
-    setup_logger(paths$output_root, "script1")
-    tracker_files <- get_files(paths$tracker_root)
-    logInfo(
-        "Found ",
-        length(tracker_files),
-        " xlsx files under ",
-        paths$tracker_root,
-        "."
-    )
-
-    synonyms <- get_synonyms()
-
-    logInfo("Start processing tracker files.")
-
-    for (i in seq_along(tracker_files)) {
-        tracker_file <- tracker_files[i]
-        tracker_name <- tools::file_path_sans_ext(basename(tracker_file))
-        tryCatch(
-            process_tracker_file(paths, tracker_file, tracker_name, synonyms),
-            error = function(e) {
-                logError("Could not process ", tracker_name, ". Error = ", e$message, ".")
-            },
-            warning = function(w) {
-                logWarn("Could not process ", tracker_name, ". Warning = ", w$message, ".")
-            }
-        )
-        cat(paste("Processed ", i, " of ", length(tracker_files), " (", round(i / length(tracker_files) * 100, 0), "%) tracker files.\n"))
-    }
-    logInfo("Finish processing all tracker files.")
-}
-
-
-process_tracker_file <- function(paths, tracker_file, tracker_name, synonyms) {
-    tracker_data_file <-
-        file.path(paths$tracker_root, tracker_file)
-    logDebug("Start process_tracker_file.")
-    logInfo(
-        "Current file: ",
-        tracker_name
+future::plan(future::multisession, workers = 4)
+
+Sys.setenv(A4D_DATA_ROOT = "data")
+paths <- a4d::init_paths(c("patient_data_raw", "product_data_raw"), delete = TRUE)
+setup_logger(paths$output_root, "script1")
+tracker_files <- a4d::get_files(paths$tracker_root)
+logInfo(
+    "Found ",
+    length(tracker_files),
+    " xlsx files under ",
+    paths$tracker_root,
+    "."
+)
+
+logInfo("Start processing tracker files.")
+
+progressr::with_progress({
+    p <- progressr::progressor(steps = length(tracker_files))
+
+    result <- furrr::future_map(
+        tracker_files,
+        a4d::process_tracker_file,
+        paths=paths,
+        p=p
     )
+})
 
-
-    logfile <- paste0(tracker_name, "_", "patient")
-    with_file_logger(logfile,
-        {
-            tryCatch(
-                process_patient_data(
-                    tracker_name = tracker_name,
-                    tracker_data_file = tracker_data_file,
-                    output_root = paths$patient_data_raw,
-                    synonyms_patient = synonyms$patient
-                ),
-                error = function(e) {
-                    logError("Could not process patient data. Error = ", e$message, ".")
-                },
-                warning = function(w) {
-                    logWarn("Could not process patient data. Warning = ", w$message, ".")
-                }
-            )
-        },
-        output_root = paths$output_root
-    )
-
-    logfile <- paste0(tracker_name, "_", "product")
-
-    with_file_logger(logfile,
-        {
-            tryCatch(
-                process_product_data(
-                    tracker_name = tracker_name,
-                    tracker_data_file = tracker_data_file,
-                    output_root = paths$product_data_raw,
-                    synonyms_product = synonyms$product
-                ),
-                error = function(e) {
-                    logError("Could not process product data. Error = ", e$message, ".")
-                },
-                warning = function(w) {
-                    logWarn("Could not process product data. Warning = ", w$message, ".")
-                }
-            )
-        },
-        output_root = paths$output_root
-    )
-
-    logInfo("Finish process_tracker_file.")
-}
-
-
-process_patient_data <-
-    function(tracker_name,
-             tracker_data_file,
-             output_root,
-             synonyms_patient) {
-        logDebug("Start process_patient_data.")
-
-        df_raw_patient <-
-            reading_patient_data(
-                tracker_data_file = tracker_data_file,
-                columns_synonyms = synonyms_patient
-            )
-
-        df_raw_patient <- df_raw_patient %>% dplyr::mutate(file_name = tracker_name)
-
-        logDebug(
-            "df_raw_patient dim: ",
-            dim(df_raw_patient) %>% as.data.frame(),
-            "."
-        )
-
-        export_data_as_parquet(
-            data = df_raw_patient,
-            filename = tracker_name,
-            output_root = output_root,
-            suffix = "_patient_raw"
-        )
-
-        logInfo("Finish process_patient_data.")
-    }
-
-
-process_product_data <-
-    function(tracker_name,
-             tracker_data_file,
-             output_root,
-             synonyms_product) {
-        logDebug("Start process_product_data.")
-
-        df_raw_product <-
-            reading_product_data_step1(
-                tracker_data_file = tracker_data_file,
-                columns_synonyms = synonyms_product
-            )
-
-        if (!is.null(df_raw_product)) {
-            df_raw_product <- df_raw_product %>% dplyr::mutate(file_name = tracker_name)
-        } else {
-            logDebug("Empty product data")
-        }
-
-        logDebug(
-            "df_raw_product dim: ",
-            dim(df_raw_product) %>% as.data.frame(),
-            "."
-        )
-
-        # product set sensitive column to NA and add tracker file name as a column
-        if (!is.null(df_raw_product)) {
-            export_data_as_parquet(
-                data = df_raw_product,
-                filename = tracker_name,
-                output_root = output_root,
-                suffix = "_product_raw"
-            )
-        } else {
-            logWarn("No product data in the file")
-        }
-        logDebug("Finish process_product_data.")
-    }
-
-# profvis(main())
-main()
+logInfo("Finish processing all tracker files.")
 
 clearLoggers()

From 7c5d157f7ffd538d59515d8961432cfdbf9439b2 Mon Sep 17 00:00:00 2001
From: Michael Aydinbas <michael.aydinbas@gmail.com>
Date: Thu, 28 Dec 2023 21:27:53 +0100
Subject: [PATCH 2/8] working future code for script 1 and 2

---
 DESCRIPTION                                |   4 +-
 NAMESPACE                                  |   3 +
 R/create_table_patient_data.R              |   4 +-
 R/create_table_patient_data_changes_only.R |   4 +-
 R/create_table_patient_data_static.R       |   4 +-
 R/create_table_product_data.R              |  42 +-
 R/helper_main.R                            |  18 +-
 R/helper_patient_data_fix.R                |  38 +-
 R/helper_product_data.R                    |  12 +-
 R/helper_read_patient_data.R               |  32 +-
 R/helper_scipt_1.R                         |  93 +++--
 R/helper_script_2.R                        | 364 ++++++++++++++++++
 R/link_product_patient.R                   |  12 +-
 R/logger.R                                 |  19 +-
 R/read_cleaned_patient_data.R              |   4 +-
 R/read_patient_data.R                      |  26 +-
 R/read_product_data.R                      |  60 +--
 R/sysdata.rda                              | Bin 2763 -> 2763 bytes
 man/process_patient_file.Rd                |  25 ++
 man/process_product_file.Rd                |  18 +
 man/process_tracker_file.Rd                |   8 +-
 reference_data/build_package_data.R        |   2 +-
 scripts/run_script_1_extract_raw_data.R    |  18 +-
 scripts/run_script_2_clean_data.R          | 422 +++------------------
 scripts/run_script_3_create_tables.R       |  32 +-
 25 files changed, 678 insertions(+), 586 deletions(-)
 create mode 100644 R/helper_script_2.R
 create mode 100644 man/process_patient_file.Rd
 create mode 100644 man/process_product_file.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index a1d7052..c273a8f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,8 +8,7 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
-Depends: 
-    ParallelLogger
+Depends:
 Imports: 
     data.table,
     digest,
@@ -17,6 +16,7 @@ Imports:
     here,
     lubridate,
     openxlsx,
+    ParallelLogger,
     readr,
     readxl,
     stringr,
diff --git a/NAMESPACE b/NAMESPACE
index f8e779f..ca93149 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,7 +21,9 @@ export(harmonize_patient_data_columns)
 export(id_2_county_hospisal)
 export(init_paths)
 export(process_patient_data)
+export(process_patient_file)
 export(process_product_data)
+export(process_product_file)
 export(process_tracker_file)
 export(read_cleaned_patient_data)
 export(read_column_synonyms)
@@ -29,6 +31,7 @@ export(report_empty_intersections)
 export(sanitize_str)
 export(select_A4D_directory)
 export(set_a4d_data_root)
+export(setup_file_logger)
 export(setup_logger)
 export(with_file_logger)
 importFrom(data.table,"%like%")
diff --git a/R/create_table_patient_data.R b/R/create_table_patient_data.R
index 3f5c864..c6793ed 100644
--- a/R/create_table_patient_data.R
+++ b/R/create_table_patient_data.R
@@ -9,7 +9,7 @@
 #' @param input_root root directory of the input CSV files.
 #' @param output_root root directory of the output folder.
 create_table_patient_data_monthly <- function(patient_data_files, input_root, output_root) {
-    logInfo("Start creating single csv for table patient_data_monthly.")
+    ParallelLogger::logInfo("Start creating single csv for table patient_data_monthly.")
 
     # THERE MIGHT BE MONTHLY COLUMNS MISSING - PLEASE ADD THEM
     dynamic_patient_columns <-
@@ -58,5 +58,5 @@ create_table_patient_data_monthly <- function(patient_data_files, input_root, ou
         suffix = ""
     )
 
-    logInfo("Finish creating single csv for table patient_data_monthly.")
+    ParallelLogger::logInfo("Finish creating single csv for table patient_data_monthly.")
 }
diff --git a/R/create_table_patient_data_changes_only.R b/R/create_table_patient_data_changes_only.R
index 6a06fe8..1a4178c 100644
--- a/R/create_table_patient_data_changes_only.R
+++ b/R/create_table_patient_data_changes_only.R
@@ -16,7 +16,7 @@ create_table_longitudinal_data <-
              output_root,
              variable,
              name) {
-        logInfo("Start creating single csv for table create_table_longitudinal_data and variable ", variable)
+        ParallelLogger::logInfo("Start creating single csv for table create_table_longitudinal_data and variable ", variable)
 
         dynamic_patient_columns <-
             c(
@@ -79,5 +79,5 @@ create_table_longitudinal_data <-
             suffix = ""
         )
 
-        logInfo("Finish creating single csv for table create_table_longitudinal_data and variable ", variable)
+        ParallelLogger::logInfo("Finish creating single csv for table create_table_longitudinal_data and variable ", variable)
     }
diff --git a/R/create_table_patient_data_static.R b/R/create_table_patient_data_static.R
index aaf180d..55c8c1d 100644
--- a/R/create_table_patient_data_static.R
+++ b/R/create_table_patient_data_static.R
@@ -9,7 +9,7 @@
 #' @param input_root root directory of the input CSV files.
 #' @param output_root root directory of the output folder.
 create_table_patient_data_static <- function(patient_data_files, input_root, output_root) {
-    logInfo("Start creating single csv for table patient_data_static.")
+    ParallelLogger::logInfo("Start creating single csv for table patient_data_static.")
 
     # THERE MIGHT BE STATIC COLUMNS MISSING - PLEASE ADD THEM
     static_patient_columns <-
@@ -59,5 +59,5 @@ create_table_patient_data_static <- function(patient_data_files, input_root, out
         suffix = "_static"
     )
 
-    logInfo("Finish creating single csv for table patient_data_static.")
+    ParallelLogger::logInfo("Finish creating single csv for table patient_data_static.")
 }
diff --git a/R/create_table_product_data.R b/R/create_table_product_data.R
index aaa6448..4ee7eaa 100644
--- a/R/create_table_product_data.R
+++ b/R/create_table_product_data.R
@@ -17,7 +17,7 @@
 #' create_table_product_data("path/to/input/directory", "path/to/output/directory")
 #' }
 create_table_product_data <- function(input_root, output_root) {
-    logInfo("Start creating single file for table product_data.")
+    ParallelLogger::logInfo("Start creating single file for table product_data.")
 
     # Get a list of all CSV files in the input_root directory
     files <- list.files(input_root, pattern = "*.parquet", full.names = TRUE)
@@ -25,7 +25,7 @@ create_table_product_data <- function(input_root, output_root) {
     # Read all CSV files and store them in a list
     data_list <- lapply(files, function(x) arrow::read_parquet(x))
 
-    logInfo(length(data_list), " files will be processed for creating the single file for table product_data.")
+    ParallelLogger::logInfo(length(data_list), " files will be processed for creating the single file for table product_data.")
 
     # Get the union of all column names
     all_names <- unique(unlist(lapply(data_list, colnames)))
@@ -39,25 +39,25 @@ create_table_product_data <- function(input_root, output_root) {
     # Merge all data frames
     merged_data <- do.call(rbind, data_list)
 
-    logDebug("Copying original parient IDs...")
+    ParallelLogger::logDebug("Copying original parient IDs...")
     merged_data$orig_product_released_to <- merged_data$product_released_to
 
-    logDebug("Trying to fix patient IDs...")
+    ParallelLogger::logDebug("Trying to fix patient IDs...")
     merged_data$product_released_to <- sapply(merged_data$product_released_to, fix_id)
 
-    logDebug("Extracting product_county and product_hospisal from patients IDs...")
+    ParallelLogger::logDebug("Extracting product_county and product_hospisal from patients IDs...")
     merged_data <- id_2_county_hospisal(
         merged_data, "product_released_to",
         "product_country", "product_hospital"
     )
 
-    logDebug("Calculating the most frequent 'product_hospital' for each 'file_name'...")
+    ParallelLogger::logDebug("Calculating the most frequent 'product_hospital' for each 'file_name'...")
     tryCatch(
         {
             merged_data <- calculate_most_frequent(merged_data, "file_name", "product_hospital", "table_hospital")
         },
         error = function(e) {
-            logError("Error in calculating the most frequent 'product_hospital': ", e)
+            ParallelLogger::logError("Error in calculating the most frequent 'product_hospital': ", e)
         }
     )
 
@@ -67,14 +67,14 @@ create_table_product_data <- function(input_root, output_root) {
             merged_data <- calculate_most_frequent(merged_data, "file_name", "product_country", "table_country")
         },
         error = function(e) {
-            logError("Error in calculating the most frequent 'product_country': ", e)
+            ParallelLogger::logError("Error in calculating the most frequent 'product_country': ", e)
         }
     )
 
     # Reorder, add, and ensures the correct data type for each column according to the list of fields
     merged_data <- preparing_product_fields(merged_data)
 
-    logDebug("Checking 'table_country' for each 'file_name'...")
+    ParallelLogger::logDebug("Checking 'table_country' for each 'file_name'...")
     report_empty_intersections(merged_data, "file_name", "table_country")
 
     # Write the merged and processed data to a file in the output_root directory
@@ -85,7 +85,7 @@ create_table_product_data <- function(input_root, output_root) {
         suffix = ""
     )
 
-    logInfo("Finish creating single file for table product_data.")
+    ParallelLogger::logInfo("Finish creating single file for table product_data.")
 }
 
 
@@ -168,7 +168,7 @@ preparing_product_fields <- function(merged_data) {
         "table_hospital" = "character"
     )
 
-    logInfo("Start processing fields for the single csv product_data...")
+    ParallelLogger::logInfo("Start processing fields for the single csv product_data...")
 
     # Check if all fields are present in merged_data
     missing_fields <- setdiff(names(fields), names(merged_data))
@@ -186,7 +186,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.Date(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    logWarn(paste(
+                    ParallelLogger::logWarn(paste(
                         "In", field, "incorrect date values were replaced with",
                         ERROR_VAL_DATE, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -198,7 +198,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.numeric(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    logWarn(paste(
+                    ParallelLogger::logWarn(paste(
                         "In", field, "incorrect numeric values were replaced with",
                         ERROR_VAL_NUMERIC, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -210,7 +210,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.integer(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    logWarn(paste(
+                    ParallelLogger::logWarn(paste(
                         "In", field, "incorrect integer values were replaced with",
                         ERROR_VAL_NUMERIC, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -222,7 +222,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- as.character(original_values)
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    logWarn(paste(
+                    ParallelLogger::logWarn(paste(
                         "In", field, "incorrect character values  were replaced with",
                         ERROR_VAL_CHARACTER, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -231,19 +231,19 @@ preparing_product_fields <- function(merged_data) {
                 }
             }
         }, warning = function(w) {
-            logError(paste("Warning in converting", field, ": ", w))
+            ParallelLogger::logError(paste("Warning in converting", field, ": ", w))
         }, error = function(e) {
-            logWarn(paste("Error in converting", field, ": ", e))
+            ParallelLogger::logWarn(paste("Error in converting", field, ": ", e))
         }, finally = {
-            logDebug(paste("Finished converting", field))
+            ParallelLogger::logDebug(paste("Finished converting", field))
         })
     }
 
     # Reorder the columns according to the list of fields
-    logInfo("Reorder the columns according to the list of fields...")
+    ParallelLogger::logInfo("Reorder the columns according to the list of fields...")
     merged_data <- merged_data[, c(names(fields), setdiff(names(merged_data), names(fields)))]
 
-    logInfo("Finished processing fields for the single csv product_data.")
+    ParallelLogger::logInfo("Finished processing fields for the single csv product_data.")
 
     return(merged_data)
 }
@@ -313,7 +313,7 @@ report_empty_intersections <- function(df, row_category, col_category) {
     df_row_sums <- df_row_sums[df_row_sums$sum == 0, ]
 
     if (nrow(df_row_sums) > 0) {
-        logWarn(
+        ParallelLogger::logWarn(
             "The number of ", row_category, " with empty ", col_category, " is ",
             nrow(df_row_sums), ": ",
             paste(df_row_sums$row_name, sep = "", collapse = ", ")
diff --git a/R/helper_main.R b/R/helper_main.R
index ca8d1a3..b5cd0a1 100644
--- a/R/helper_main.R
+++ b/R/helper_main.R
@@ -139,7 +139,7 @@ read_column_synonyms <- function(synonym_file, path_prefixes = c("reference_data
 #' )
 #' }
 export_data <- function(data, filename, output_root, suffix) {
-    logDebug("Start export_data. Suffix = ", suffix, ".")
+    ParallelLogger::logDebug("Start export_data. Suffix = ", suffix, ".")
     data %>%
         write.csv(
             file =
@@ -156,7 +156,7 @@ export_data <- function(data, filename, output_root, suffix) {
             fileEncoding = "UTF-16LE",
             quote = T
         )
-    logInfo("Finish export_data. Suffix = ", suffix, ".")
+    ParallelLogger::logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -177,12 +177,12 @@ export_data <- function(data, filename, output_root, suffix) {
 #' )
 #' }
 export_data_as_parquet <- function(data, filename, output_root, suffix) {
-    logDebug("Start export_data. Suffix = ", suffix, ".")
+    ParallelLogger::logDebug("Start export_data. Suffix = ", suffix, ".")
     data %>%
         arrow::write_parquet(
             sink = file.path(output_root, paste0(filename, suffix, ".parquet")),
         )
-    logInfo("Finish export_data. Suffix = ", suffix, ".")
+    ParallelLogger::logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -193,7 +193,7 @@ export_data_as_parquet <- function(data, filename, output_root, suffix) {
 #'
 #' @return tibble with patient data
 read_raw_csv <- function(file) {
-    logDebug("Start reading data with read_csv.")
+    ParallelLogger::logDebug("Start reading data with read_csv.")
     df_patient_raw <- readr::read_csv(
         file,
         name_repair = "check_unique",
@@ -202,9 +202,9 @@ read_raw_csv <- function(file) {
         col_types = readr::cols(.default = "c"),
         locale = readr::locale(encoding = "UTF-16LE")
     )
-    logDebug("Finished loading data with read_csv.")
-    logInfo("Dim: ", dim(df_patient_raw))
-    logInfo("Columns: ", spec(df_patient_raw))
+    ParallelLogger::logDebug("Finished loading data with read_csv.")
+    ParallelLogger::logInfo("Dim: ", dim(df_patient_raw))
+    ParallelLogger::logInfo("Columns: ", spec(df_patient_raw))
 
     df_patient_raw
 }
@@ -218,6 +218,6 @@ read_raw_csv <- function(file) {
 #' @return A named character vector with all allowed provinces.
 get_allowed_provinces <- function() {
     ## Should new countries and provinces be added, update the YAML file
-    provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") %>% unlist()
+    provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") |> unlist()
     return(provinces)
 }
diff --git a/R/helper_patient_data_fix.R b/R/helper_patient_data_fix.R
index 05bb168..5b10d72 100644
--- a/R/helper_patient_data_fix.R
+++ b/R/helper_patient_data_fix.R
@@ -24,11 +24,11 @@ convert_to <- function(x, cast_fnc, error_val, col_name = "", id = "") {
     x <- tryCatch(
         cast_fnc(x),
         error = function(e) {
-            logError("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
+            ParallelLogger::logError("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
             x <- error_val
         },
         warning = function(w) {
-            logWarn("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
+            ParallelLogger::logWarn("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
             x <- error_val
         }
     )
@@ -55,7 +55,7 @@ cut_numeric_value <- function(x,
     }
 
     if (x < min || x > max) {
-        logWarn(
+        ParallelLogger::logWarn(
             "Found invalid value ", x, " for column ", col_name, " outside [", min, ", ", max, "]. ",
             "Value was replaced with ", ERROR_VAL_NUMERIC, "."
         )
@@ -154,7 +154,7 @@ parse_dates <- function(date) {
     parsed_date <- suppressWarnings(lubridate::as_date(date))
 
     if (is.na(parsed_date)) {
-        logWarn(
+        ParallelLogger::logWarn(
             "Could not parse date value ", date, ". ",
             "Trying to parse with lubridate::parse_date_time and orders = c('dmy', 'dmY', 'by', 'bY')."
         )
@@ -195,9 +195,9 @@ check_allowed_values <- function(x, valid_values, id, replace_invalid = TRUE, er
     valid_value_mapping <- setNames(as.list(valid_values), sanitize_str(valid_values))
 
     if (!sanitize_str(x) %in% names(valid_value_mapping)) {
-        logWarn("Patient ", id, ": Value ", x, " for column ", col, " is not in the list of allowed values. ")
+        ParallelLogger::logWarn("Patient ", id, ": Value ", x, " for column ", col, " is not in the list of allowed values. ")
         if (replace_invalid) {
-            logInfo("Replacing ", x, " with ", error_val, ".")
+            ParallelLogger::logInfo("Replacing ", x, " with ", error_val, ".")
             return(error_val)
         } else {
             return(x)
@@ -275,13 +275,13 @@ fix_age <- function(age, dob, tracker_year, tracker_month, id) {
         }
 
         if (is.na(age)) {
-            logWarn(
+            ParallelLogger::logWarn(
                 "Patient ", id, ": age is missing. Using calculated age ", calc_age,
                 " instead of original age."
             )
         } else {
             if (calc_age != age) {
-                logWarn(
+                ParallelLogger::logWarn(
                     "Patient ", id, ": age ", age, " is different from calculated age ", calc_age,
                     ". Using calculated age instead of original age."
                 )
@@ -289,7 +289,7 @@ fix_age <- function(age, dob, tracker_year, tracker_month, id) {
         }
 
         if (calc_age < 0) {
-            logWarn("Patient ", id, ": calculated age is negative. Something went wrong.")
+            ParallelLogger::logWarn("Patient ", id, ": calculated age is negative. Something went wrong.")
             calc_age <- ERROR_VAL_NUMERIC
         }
     }
@@ -322,11 +322,11 @@ fix_bmi <- function(weight, height, id) {
 
 
     if (!is.na(weight) && weight == ERROR_VAL_CHARACTER) {
-        logWarn("Patient ", id, ": the weight is out of bounds.")
+        ParallelLogger::logWarn("Patient ", id, ": the weight is out of bounds.")
     }
 
     if (!is.na(height) && height == ERROR_VAL_CHARACTER) {
-        logWarn("Patient ", id, ": the height is out of bounds.")
+        ParallelLogger::logWarn("Patient ", id, ": the height is out of bounds.")
     }
     bmi
 }
@@ -353,7 +353,7 @@ fix_sex <- function(sex, id) {
     )
 
     if (!is.na(fixed_sex) && fixed_sex == ERROR_VAL_CHARACTER) {
-        logWarn("Patient ", id, ": sex ", sex, " is not in the list of synonyms. Replacing it with ", fixed_sex, ".")
+        ParallelLogger::logWarn("Patient ", id, ": sex ", sex, " is not in the list of synonyms. Replacing it with ", fixed_sex, ".")
     }
     fixed_sex
 }
@@ -463,7 +463,7 @@ fix_testing_frequency <- function(test_frq) {
     }
 
     if (grepl("-", test_frq, fixed = TRUE)) {
-        logInfo("Found a range for testing_frequency. Replacing it with the mean.")
+        ParallelLogger::logInfo("Found a range for testing_frequency. Replacing it with the mean.")
         test_frq <- try(as.character(replace_range_with_mean(test_frq), silent = TRUE))
     }
 
@@ -492,7 +492,7 @@ replace_range_with_mean <- function(x) {
 #'
 #' @return data frame with two new columns: blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.
 split_bp_in_sys_and_dias <- function(df) {
-    logInfo("Splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
+    ParallelLogger::logInfo("Splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
     df <- df %>%
         dplyr::mutate(
             blood_pressure_mmhg = dplyr::case_when(
@@ -502,7 +502,7 @@ split_bp_in_sys_and_dias <- function(df) {
         )
 
     if (paste(ERROR_VAL_NUMERIC, ERROR_VAL_NUMERIC, sep = "/") %in% df$blood_pressure_mmhg) {
-        logWarn(
+        ParallelLogger::logWarn(
             "Found invalid values for column blood_pressure_mmhg that do not follow the format X/Y. ",
             "Values were replaced with ", ERROR_VAL_NUMERIC, "."
         )
@@ -514,7 +514,7 @@ split_bp_in_sys_and_dias <- function(df) {
             delim = "/",
             names = c("blood_pressure_sys_mmhg", "blood_pressure_dias_mmhg"),
         )
-    logDebug("Finished splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
+    ParallelLogger::logDebug("Finished splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
     df
 }
 
@@ -550,12 +550,12 @@ fix_id <- function(id) {
     id <- stringr::str_replace(id, "-", "_")
 
     if (!grepl("^[[:upper:]]{2}_[[:upper:]]{2}[[:digit:]]{3}$", id)) {
-        logWarn("Patient ", id, ": id cannot be matched to a 7 letter alpha numeric code like XX_YY001. ")
+        ParallelLogger::logWarn("Patient ", id, ": id cannot be matched to a 7 letter alpha numeric code like XX_YY001. ")
         if (stringr::str_length(id) > 8) {
-            logWarn("Patient ", id, ": id was truncated because it is longer than 8 characters.")
+            ParallelLogger::logWarn("Patient ", id, ": id was truncated because it is longer than 8 characters.")
             id <- stringr::str_sub(id, 1, 8)
         } else {
-            logError("Patient ", id, ": id is not valid.")
+            ParallelLogger::logError("Patient ", id, ": id is not valid.")
             id <- ERROR_VAL_CHARACTER
         }
     }
diff --git a/R/helper_product_data.R b/R/helper_product_data.R
index 6f04cd7..3e56f6b 100644
--- a/R/helper_product_data.R
+++ b/R/helper_product_data.R
@@ -83,7 +83,7 @@ get_patient_end <- function(df, j) {
 
 # @Description: Reads product data from a monthly file based on extraction logic
 extract_product_data <- function(monthly_tracker_df) {
-    logDebug("Starting extract_product_data.")
+    ParallelLogger::logDebug("Starting extract_product_data.")
     start_df_msd <- NULL
     end_df_msd <- NULL
 
@@ -103,7 +103,7 @@ extract_product_data <- function(monthly_tracker_df) {
     # Clean empty remaining first row
     product_data_df <- set_second_row_as_headers_and_remove_first_row(product_data_df)
 
-    logDebug("Finish extract_product_data.")
+    ParallelLogger::logDebug("Finish extract_product_data.")
 
     return(product_data_df)
 }
@@ -134,7 +134,7 @@ extract_product_data <- function(monthly_tracker_df) {
 # column synonyms to unify column names
 # @columns_synonyms: Long format output of read_column_synonyms to match columns
 harmonize_input_data_columns <- function(product_df, columns_synonyms) {
-    logDebug("Start harmonize_input_data_columns.")
+    ParallelLogger::logDebug("Start harmonize_input_data_columns.")
     # In case that there is additional data in strange columns, keep only relevant columns
     # keep.cols <- names(product_df) %in% c("")
 
@@ -147,14 +147,14 @@ harmonize_input_data_columns <- function(product_df, columns_synonyms) {
     ## report all column names which have not been found
     unknown_column_names <- colnames(product_df)[!colnames(product_df) %in% synonym_headers]
     if (length(unknown_column_names) > 0) {
-        logWarn("Unknown column names in sheet: ", paste(unknown_column_names, collapse = ", "))
+        ParallelLogger::logWarn("Unknown column names in sheet: ", paste(unknown_column_names, collapse = ", "))
     }
 
     # replacing var codes
     colnames_found <- match(colnames(product_df), synonym_headers, nomatch = 0)
     colnames(product_df)[colnames(product_df) %in% synonym_headers] <- columns_synonyms$name_clean[colnames_found]
 
-    logDebug("Finish harmonize_input_data_columns.")
+    ParallelLogger::logDebug("Finish harmonize_input_data_columns.")
     if (sum(colnames_found == 0) != 0) {
         "Non-matching column names found (see 0)"
         # SK: remove non matching column names
@@ -282,7 +282,7 @@ update_receivedfrom <- function(product_df) {
                 grepl("Balance", product_units_received, ignore.case = TRUE) & !is.na(product_received_from) ~ product_received_from
             )) %>%
             dplyr::mutate(product_units_released = ifelse(!is.na(product_received_from), NA, product_units_released))
-        logInfo("The rule for the case was applied successfully- Released (product_units_released) column also includes values for Start/End Balance")
+        ParallelLogger::logInfo("The rule for the case was applied successfully- Released (product_units_released) column also includes values for Start/End Balance")
     }
     return(product_df)
 }
diff --git a/R/helper_read_patient_data.R b/R/helper_read_patient_data.R
index cca5779..8f61b04 100644
--- a/R/helper_read_patient_data.R
+++ b/R/helper_read_patient_data.R
@@ -1,7 +1,7 @@
 # extracting country and clinic code from patient ID
 # expects that patient ID has a certain format
 extract_country_clinic_code <- function(patient_data) {
-    logDebug("Start extract_country_clinic_code.")
+    ParallelLogger::logDebug("Start extract_country_clinic_code.")
     patient_ids <- patient_data["id"] %>%
         dplyr::filter(id != "0") %>%
         tidyr::drop_na() %>%
@@ -19,9 +19,9 @@ extract_country_clinic_code <- function(patient_data) {
     clinic_code <-
         names(sort(table(patient_ids$clinic), decreasing = T))[1]
 
-    logDebug("country_code = ", country_code, ".")
-    logDebug("clinic_code = ", clinic_code, ".")
-    logDebug("Finish extract_country_clinic_code.")
+    ParallelLogger::logDebug("country_code = ", country_code, ".")
+    ParallelLogger::logDebug("clinic_code = ", clinic_code, ".")
+    ParallelLogger::logDebug("Finish extract_country_clinic_code.")
     return(list("country_code" = country_code, "clinic_code" = clinic_code))
 }
 
@@ -40,9 +40,9 @@ extract_country_clinic_code <- function(patient_data) {
 #' @return data.frame with the patient data
 #' @export
 extract_patient_data <- function(tracker_data_file, sheet, year) {
-    logDebug("Start extract_patient_data for sheet = ", sheet, ".")
+    ParallelLogger::logDebug("Start extract_patient_data for sheet = ", sheet, ".")
 
-    logDebug("Start openxlsx::read.xlsx to get tracker_data.")
+    ParallelLogger::logDebug("Start openxlsx::read.xlsx to get tracker_data.")
     tracker_data <- openxlsx::read.xlsx(
         xlsxFile = tracker_data_file,
         sheet = sheet,
@@ -59,7 +59,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     # col_names = F,
     # .name_repair = "unique_quiet"
     # )
-    logDebug("Finish openxlsx::read.xlsx.")
+    ParallelLogger::logDebug("Finish openxlsx::read.xlsx.")
 
     # Assumption: first column is always empty until patient data begins
     patient_data_range <- which(!is.na(tracker_data[, 1]))
@@ -80,8 +80,8 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
         row_max <- row_max + 1
     }
 
-    logInfo("Patient data found in rows ", row_min, " to ", row_max, ".")
-    logDebug("Start readxl::read_excel to get patient data.")
+    ParallelLogger::logInfo("Patient data found in rows ", row_min, " to ", row_max, ".")
+    ParallelLogger::logDebug("Start readxl::read_excel to get patient data.")
     df_patient <- readxl::read_excel(
         path = tracker_data_file,
         sheet = sheet,
@@ -91,11 +91,11 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
         col_types = c("text"),
         .name_repair = "unique_quiet"
     )
-    logDebug("Finish readxl::read_excel.")
+    ParallelLogger::logDebug("Finish readxl::read_excel.")
 
     if (header_cols[2] == header_cols_2[2]) {
         # take into account that date info gets separated from the updated values (not in the same row, usually in the bottom row)
-        logInfo("Read in multiline header.")
+        ParallelLogger::logInfo("Read in multiline header.")
 
         diff_colnames <- which((header_cols != header_cols_2))
         header_cols[diff_colnames] <-
@@ -106,7 +106,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     }
 
     colnames(df_patient) <- header_cols
-    logDebug("Found patient column names = ", paste(header_cols, collapse = ","), ".")
+    ParallelLogger::logDebug("Found patient column names = ", paste(header_cols, collapse = ","), ".")
 
     # delete columns without a header (=NA)
     df_patient <- df_patient[, !is.na(colnames(df_patient))]
@@ -117,7 +117,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     df_patient <-
         df_patient[rowSums(is.na(df_patient)) != ncol(df_patient), ]
 
-    logDebug("Finish extract_patient_data.")
+    ParallelLogger::logDebug("Finish extract_patient_data.")
 
     df_patient
 }
@@ -137,7 +137,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
 #' @export
 harmonize_patient_data_columns <-
     function(patient_df, columns_synonyms) {
-        logDebug("Start harmonize_patient_data_columns.")
+        ParallelLogger::logDebug("Start harmonize_patient_data_columns.")
 
         patient_df <- patient_df[!is.na(names(patient_df))]
 
@@ -151,11 +151,11 @@ harmonize_patient_data_columns <-
 
         mismatching_column_ids <- which(colnames_found == 0)
         if (length(mismatching_column_ids) > 0) {
-            logWarn(
+            ParallelLogger::logWarn(
                 "Non-matching column names found: ", paste(colnames(patient_df)[mismatching_column_ids], collapse = ","), "."
             )
         }
 
-        logDebug("Finish harmonize_patient_data_columns.")
+        ParallelLogger::logDebug("Finish harmonize_patient_data_columns.")
         patient_df
     }
diff --git a/R/helper_scipt_1.R b/R/helper_scipt_1.R
index 222d5f1..f1cf102 100644
--- a/R/helper_scipt_1.R
+++ b/R/helper_scipt_1.R
@@ -1,5 +1,3 @@
-
-
 #' @title Process a single tracker file and extract patient and product data.
 #'
 #' @param tracker_file Filename of the tracler.
@@ -13,56 +11,57 @@ process_tracker_file <- function(tracker_file, paths, p) {
     synonyms <- get_synonyms()
     tracker_data_file <-
         file.path(paths$tracker_root, tracker_file)
-    logDebug("Start process_tracker_file.")
-    logInfo(
+
+    ParallelLogger::logDebug("Start process_tracker_file.")
+    ParallelLogger::logInfo(
         "Current file: ",
         tracker_name
     )
 
     logfile <- paste0(tracker_name, "_", "patient")
     with_file_logger(logfile,
-                     {
-                         tryCatch(
-                             process_patient_data(
-                                 tracker_name = tracker_name,
-                                 tracker_data_file = tracker_data_file,
-                                 output_root = paths$patient_data_raw,
-                                 synonyms_patient = synonyms$patient
-                             ),
-                             error = function(e) {
-                                 logError("Could not process patient data. Error = ", e$message, ".")
-                             },
-                             warning = function(w) {
-                                 logWarn("Could not process patient data. Warning = ", w$message, ".")
-                             }
-                         )
-                     },
-                     output_root = paths$output_root
+        {
+            tryCatch(
+                process_patient_data(
+                    tracker_name = tracker_name,
+                    tracker_data_file = tracker_data_file,
+                    output_root = paths$patient_data_raw,
+                    synonyms_patient = synonyms$patient
+                ),
+                error = function(e) {
+                    ParallelLogger::logError("Could not process patient data. Error = ", e$message, ".")
+                },
+                warning = function(w) {
+                    ParallelLogger::logWarn("Could not process patient data. Warning = ", w$message, ".")
+                }
+            )
+        },
+        output_root = paths$output_root
     )
 
     logfile <- paste0(tracker_name, "_", "product")
 
     with_file_logger(logfile,
-                     {
-                         tryCatch(
-                             process_product_data(
-                                 tracker_name = tracker_name,
-                                 tracker_data_file = tracker_data_file,
-                                 output_root = paths$product_data_raw,
-                                 synonyms_product = synonyms$product
-                             ),
-                             error = function(e) {
-                                 logError("Could not process product data. Error = ", e$message, ".")
-                             },
-                             warning = function(w) {
-                                 logWarn("Could not process product data. Warning = ", w$message, ".")
-                             }
-                         )
-                     },
-                     output_root = paths$output_root
+        {
+            tryCatch(
+                process_product_data(
+                    tracker_name = tracker_name,
+                    tracker_data_file = tracker_data_file,
+                    output_root = paths$product_data_raw,
+                    synonyms_product = synonyms$product
+                ),
+                error = function(e) {
+                    ParallelLogger::logError("Could not process product data. Error = ", e$message, ".")
+                },
+                warning = function(w) {
+                    ParallelLogger::logWarn("Could not process product data. Warning = ", w$message, ".")
+                }
+            )
+        },
+        output_root = paths$output_root
     )
 
-    logInfo("Finish process_tracker_file.")
+    ParallelLogger::logDebug("Finish process_tracker_file.")
 }
 
 
@@ -79,7 +78,7 @@ process_patient_data <-
              tracker_data_file,
              output_root,
              synonyms_patient) {
-        logDebug("Start process_patient_data.")
+        ParallelLogger::logDebug("Start process_patient_data.")
 
         df_raw_patient <-
             reading_patient_data(
@@ -89,7 +88,7 @@ process_patient_data <-
 
         df_raw_patient <- df_raw_patient %>% dplyr::mutate(file_name = tracker_name)
 
-        logDebug(
+        ParallelLogger::logDebug(
             "df_raw_patient dim: ",
             dim(df_raw_patient) %>% as.data.frame(),
             "."
@@ -102,7 +101,7 @@ process_patient_data <-
             suffix = "_patient_raw"
         )
 
-        logInfo("Finish process_patient_data.")
+        ParallelLogger::logDebug("Finish process_patient_data.")
     }
 
 
@@ -119,7 +118,7 @@ process_product_data <-
              tracker_data_file,
              output_root,
              synonyms_product) {
-        logDebug("Start process_product_data.")
+        ParallelLogger::logDebug("Start process_product_data.")
 
         df_raw_product <-
             reading_product_data_step1(
@@ -130,10 +129,10 @@ process_product_data <-
         if (!is.null(df_raw_product)) {
             df_raw_product <- df_raw_product %>% dplyr::mutate(file_name = tracker_name)
         } else {
-            logDebug("Empty product data")
+            ParallelLogger::logDebug("Empty product data")
         }
 
-        logDebug(
+        ParallelLogger::logDebug(
             "df_raw_product dim: ",
             dim(df_raw_product) %>% as.data.frame(),
             "."
@@ -148,7 +147,7 @@ process_product_data <-
                 suffix = "_product_raw"
             )
         } else {
-            logWarn("No product data in the file")
+            ParallelLogger::logWarn("No product data in the file")
         }
-        logDebug("Finish process_product_data.")
+        ParallelLogger::logDebug("Finish process_product_data.")
     }
diff --git a/R/helper_script_2.R b/R/helper_script_2.R
new file mode 100644
index 0000000..035b5db
--- /dev/null
+++ b/R/helper_script_2.R
@@ -0,0 +1,364 @@
+#' Process and clean patient raw data
+#'
+#' @description
+#' Read in the output of the first script as parquet (all character).
+#' Merge extracted patient data with the metadata schema that contains all
+#' necessary columns. Process each variable in three steps:
+#' 1. Fix any problems with character representation.
+#' 2. Convert column to target data type, for example a date or number.
+#' 3. Apply any post-processing logic for the target data type.
+#'
+#'
+#' @param patient_file Path to patient raw parquet file from first script.
+#' @param paths List with paths to patient_data_cleaned and product_data_cleaned folders.
+#' @param p progressor from progressr package.
+#'
+#' @export
+process_patient_file <- function(patient_file, paths, p) {
+    p()
+
+    ERROR_VAL_NUMERIC <<- 999999
+    ERROR_VAL_CHARACTER <<- "Undefined"
+    ERROR_VAL_DATE <<- "9999-09-09"
+
+    patient_file_name <- tools::file_path_sans_ext(basename(patient_file))
+    patient_file_path <-
+        file.path(paths$tracker_root, patient_file)
+    output_root <- paths$patient_data_cleaned
+
+    ParallelLogger::logDebug("Start process_patient_file.")
+    ParallelLogger::logInfo(
+        "Current file: ",
+        patient_file_name
+    )
+
+    logfile <- paste0(patient_file_name)
+
+    with_file_logger(logfile,
+        {
+            tryCatch(
+                process_patient_file_worker(
+                    patient_file_path = patient_file_path,
+                    patient_file_name = patient_file_name,
+                    output_root = output_root
+                ),
+                error = function(e) {
+                    ParallelLogger::logError("Could not process raw patient data. Error = ", e$message, ".")
+                },
+                warning = function(w) {
+                    ParallelLogger::logWarn("Could not process raw patient data. Warning = ", w$message, ".")
+                }
+            )
+        },
+        output_root = paths$output_root
+    )
+
+    ParallelLogger::logInfo("Finish process_patient_file.")
+}
+
+
+process_patient_file_worker <- function(patient_file_path, patient_file_name, output_root) {
+    allowed_provinces <- get_allowed_provinces()
+
+    df_patient_raw <- arrow::read_parquet(patient_file_path)
+
+    # filter all rows with no patient id or patient name
+    df_patient_raw <- df_patient_raw %>%
+        dplyr::filter(!(is.na(id) & is.na(name))) %>%
+        dplyr::filter(!(id == "0" & name == "0"))
+
+    # --- TRANSFORMATIONS ---
+    # data before 2019 had only one column for updated hba1c and fbg
+    # with date as part of the value
+    if (!"hba1c_updated_date" %in% colnames(df_patient_raw) && "hba1c_updated" %in% colnames(df_patient_raw)) {
+        ParallelLogger::logInfo("Column updated_hba1c_date not found. Trying to parse from hba1c_updated.")
+        df_patient_raw <-
+            extract_date_from_measurement(df_patient_raw, "hba1c_updated")
+        ParallelLogger::logDebug("Finished parsing dates from hba1c_updated.")
+    }
+
+    if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mg" %in% colnames(df_patient_raw)) {
+        ParallelLogger::logInfo("Column updated_fbg_date not found. Trying to parse from fbg_updated_mg.")
+        df_patient_raw <-
+            extract_date_from_measurement(df_patient_raw, "fbg_updated_mg")
+        ParallelLogger::logDebug("Finished parsing dates from fbg_updated_mg.")
+    }
+
+    if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mmol" %in% colnames(df_patient_raw)) {
+        ParallelLogger::logInfo("Column fbg_updated_date not found. Trying to parse from fbg_updated_mmol.")
+        df_patient_raw <-
+            extract_date_from_measurement(df_patient_raw, "fbg_updated_mmol")
+        ParallelLogger::logDebug("Finished parsing dates from fbg_updated_mmol.")
+    }
+
+    # blood pressure is given as sys/dias value pair,
+    # so we split this column in two separate columns
+    if ("blood_pressure_mmhg" %in% colnames(df_patient_raw)) {
+        df_patient_raw <- split_bp_in_sys_and_dias(df_patient_raw)
+    }
+
+    # The maximum value available for hba1c will be around 14% - 18%,
+    # depending on the equipment being used.
+    # If the reading is above the maximum available value the > sign is used -
+    # we would prefer to retain this character in the database as it is important for data analysis.
+    ParallelLogger::logInfo("Adding columns hba1c_baseline_exceeds and hba1c_updated_exceeds.")
+    df_patient_raw <- df_patient_raw %>%
+        dplyr::mutate(
+            hba1c_baseline_exceeds = ifelse(grepl(">|<", hba1c_baseline), TRUE, FALSE),
+            hba1c_updated_exceeds = ifelse(grepl(">|<", hba1c_updated), TRUE, FALSE)
+        )
+
+    # --- META SCHEMA ---
+    # meta schema has all final columns for the database
+    # along with their corresponding data types
+    ParallelLogger::logInfo("Creating meta schema.")
+    # short type string for read_csv:
+    # iiinDccDcnnDnncnlnlDncDccDDDccccDccccciDciiiDn
+    schema <- tibble::tibble(
+        # clinic_visit = logical(),
+        # complication_screening = character(),
+        # complication_screening_date = lubridate::as_date(1),
+        # complication_screening_results = character(),
+        # dm_complication_comment = character(), # TODO
+        # dm_complication_eye = character(), # TODO
+        # dm_complication_kidney = character(), # TODO
+        # dm_complication_other = character(), # TODO
+        # est_strips_pmonth = integer(),
+        # family_support_scale = character(), # TODO
+        # inactive_reason = character(),
+        # insulin_dosage = character(),
+        # meter_received_date = lubridate::as_date(1), # TODO
+        # remarks = character(),
+        # remote_followup = logical(),
+        # additional_support = character(),
+        age = integer(),
+        blood_pressure_dias_mmhg = integer(),
+        blood_pressure_sys_mmhg = integer(),
+        bmi = numeric(),
+        bmi_date = lubridate::as_date(1),
+        clinic_code = character(),
+        country_code = character(),
+        dob = lubridate::as_date(1),
+        edu_occ = character(),
+        fbg_baseline_mg = numeric(),
+        fbg_baseline_mmol = numeric(),
+        fbg_updated_date = lubridate::as_date(1),
+        fbg_updated_mg = numeric(),
+        fbg_updated_mmol = numeric(),
+        file_name = character(),
+        hba1c_baseline = numeric(),
+        hba1c_baseline_exceeds = logical(),
+        hba1c_updated = numeric(),
+        hba1c_updated_exceeds = logical(),
+        hba1c_updated_date = lubridate::as_date(1),
+        height = numeric(),
+        hospitalisation_cause = character(),
+        hospitalisation_date = lubridate::as_date(1),
+        id = character(),
+        insulin_regimen = character(),
+        last_clinic_visit_date = lubridate::as_date(1),
+        last_remote_followup_date = lubridate::as_date(1),
+        lost_date = lubridate::as_date(1),
+        name = character(),
+        observations = character(),
+        observations_category = character(),
+        province = character(),
+        recruitment_date = lubridate::as_date(1),
+        sex = character(),
+        sheet_name = character(),
+        status = character(),
+        status_out = character(),
+        support_from_a4d = character(),
+        t1d_diagnosis_age = integer(),
+        t1d_diagnosis_date = lubridate::as_date(1),
+        t1d_diagnosis_with_dka = character(),
+        testing_frequency = integer(),
+        tracker_date = lubridate::as_date(1),
+        tracker_month = integer(),
+        tracker_year = integer(),
+        updated_2022_date = lubridate::as_date(1),
+        weight = numeric()
+    )
+
+    cols_extra <- colnames(df_patient_raw)[!colnames(df_patient_raw) %in% colnames(schema)]
+    ParallelLogger::logWarn("Extra columns in patient data: ", paste(cols_extra, collapse = ", "))
+
+    cols_missing <-
+        colnames(schema)[!colnames(schema) %in% colnames(df_patient_raw)]
+    ParallelLogger::logWarn("Missing columns in patient data: ", paste(cols_missing, collapse = ", "))
+
+    # add all columns of schema to df_patient_raw
+    # keep all rows, only append missing cols
+    ParallelLogger::logInfo("Merging df_patient with meta schema and selecting all columns of meta schema.")
+    df_patient <- merge.default(df_patient_raw, schema, all.x = T)
+    df_patient <- df_patient[colnames(schema)]
+
+    # the cleaning, fixing and validating happens in three major steps:
+    # 1. make sure we fix any known problems in the raw character columns
+    df_patient <-
+        df_patient %>%
+        dplyr::rowwise() %>%
+        # 1. handle known problems before converting to target type
+        dplyr::mutate(
+            t1d_diagnosis_age = fix_t1d_diagnosis_age(t1d_diagnosis_age, id),
+            hba1c_baseline = stringr::str_replace(hba1c_baseline, "<|>", ""),
+            hba1c_updated = stringr::str_replace(hba1c_updated, "<|>", ""),
+            fbg_baseline_mg = fix_fbg(fbg_baseline_mg),
+            fbg_baseline_mmol = fix_fbg(fbg_baseline_mmol),
+            fbg_updated_mg = fix_fbg(fbg_updated_mg),
+            fbg_updated_mmol = fix_fbg(fbg_updated_mmol),
+            testing_frequency = fix_testing_frequency(testing_frequency)
+        ) %>%
+        # 2. convert the refined character columns into the target data type
+        dplyr::mutate(
+            dplyr::across(
+                schema %>% dplyr::select(tidyselect::where(is.numeric)) %>% names(),
+                \(x) convert_to(correct_decimal_sign(x), as.numeric, ERROR_VAL_NUMERIC, dplyr::cur_column(), id = id)
+            ),
+            dplyr::across(
+                schema %>% dplyr::select(tidyselect::where(is.logical)) %>% names(),
+                \(x) convert_to(x, as.logical, FALSE, dplyr::cur_column(), id = id)
+            ),
+            dplyr::across(
+                schema %>% dplyr::select(tidyselect::where(lubridate::is.Date)) %>% names(),
+                \(x) convert_to(fix_digit_date(x), parse_dates, as.Date(ERROR_VAL_DATE), dplyr::cur_column(), id = id)
+            ),
+            dplyr::across(
+                schema %>% dplyr::select(tidyselect::where(is.integer)) %>% names(),
+                \(x) convert_to(x, function(x) as.integer(round(as.double(x))), ERROR_VAL_NUMERIC, dplyr::cur_column(), id = id)
+            )
+        ) %>%
+        # 3. fix remaining problems in the target data type
+        dplyr::mutate(
+            # height and weight are needed to calculate bmi
+            height = transform_cm_to_m(height) %>%
+                cut_numeric_value(min = 0, max = 2.3, col_name = "height"),
+            weight = cut_numeric_value(weight, min = 0, max = 200, col_name = "weight"),
+            bmi = fix_bmi(weight, height, id) %>%
+                cut_numeric_value(min = 4, max = 60, "bmi"),
+            age = fix_age(age, dob, tracker_year, tracker_month, id) %>%
+                cut_numeric_value(min = 0, max = 25, "age"),
+            sex = fix_sex(sex, id),
+            hba1c_baseline = cut_numeric_value(hba1c_baseline, min = 4, max = 18, "hba1c_baseline"),
+            # https://www.cleveland19.com/story/1425584/ohio-man-holds-world-record-of-highest-blood-sugar/
+            fbg_baseline_mmol = cut_numeric_value(fbg_baseline_mmol, min = 0, max = 136.5, "fbg_baseline_mmol"),
+            # https://www.cleveland19.com/story/1425584/ohio-man-holds-world-record-of-highest-blood-sugar/
+            fbg_updated_mmol = cut_numeric_value(fbg_updated_mmol, min = 0, max = 136.5, "fbg_updated_mmol"),
+            blood_pressure_sys_mmhg = cut_numeric_value(blood_pressure_sys_mmhg, min = 20, max = 250, "blood_pressure_sys_mmhg"),
+            blood_pressure_dias_mmhg = cut_numeric_value(blood_pressure_dias_mmhg, min = 20, max = 220, "blood_pressure_dias_mmhg"),
+            tracker_date = lubridate::ym(paste(tracker_year, tracker_month, sep = "-")),
+            !!!parse_character_cleaning_config(a4d:::config$cleaning),
+            # should be fixed last as other fix functions use id to log invalid rows!
+            id = fix_id(id)
+        ) %>%
+        dplyr::ungroup()
+
+    # add clinic and country code after having fixed all issues with patient id
+    cc_codes <- extract_country_clinic_code(df_patient)
+    df_patient["clinic_code"] <- cc_codes$clinic_code
+    df_patient["country_code"] <- cc_codes$country_code
+
+    # Formula to calculate mmol/l from mg/dl: mmol/l = mg/dl / 18
+    if (all(is.na(df_patient$fbg_baseline_mmol))) {
+        df_patient <- df_patient %>%
+            dplyr::mutate(fbg_baseline_mmol = dplyr::case_when(
+                fbg_baseline_mg != ERROR_VAL_NUMERIC ~ fbg_baseline_mg / 18
+            ))
+    }
+    if (all(is.na(df_patient$fbg_updated_mmol))) {
+        df_patient <- df_patient %>%
+            dplyr::mutate(fbg_updated_mmol = dplyr::case_when(
+                fbg_updated_mg != ERROR_VAL_NUMERIC ~ fbg_updated_mg / 18
+            ))
+    }
+
+    # Formula to calculate mg/dl from mmol/l: mg/dl = 18 × mmol/l
+    if (all(is.na(df_patient$fbg_baseline_mg))) {
+        df_patient <- df_patient %>%
+            dplyr::mutate(fbg_baseline_mg = dplyr::case_when(
+                fbg_baseline_mmol != ERROR_VAL_NUMERIC ~ fbg_baseline_mmol * 18
+            ))
+    }
+    if (all(is.na(df_patient$fbg_updated_mg))) {
+        df_patient <- df_patient %>%
+            dplyr::mutate(fbg_updated_mg = dplyr::case_when(
+                fbg_updated_mmol != ERROR_VAL_NUMERIC ~ fbg_updated_mmol * 18
+            ))
+    }
+
+    # sort by year and month like it is in the tracker files
+    df_patient <- df_patient %>%
+        dplyr::arrange(tracker_date, id)
+
+    ParallelLogger::logDebug(
+        "df_patient dim: ",
+        dim(df_patient) %>% as.data.frame(),
+        "."
+    )
+
+    export_data_as_parquet(
+        data = df_patient,
+        filename = stringr::str_replace(patient_file_name, "_patient_raw", ""),
+        output_root = output_root,
+        suffix = "_patient_cleaned"
+    )
+}
+
+
+#' Process and clean product raw data
+#'
+#' @description
+#' A short description...
+#'
+#'
+#' @param product_file Path to product raw parquet file from first script.
+#' @param paths List with paths to patient_data_cleaned and product_data_cleaned folders.
+#' @param p progressor from progressr package.
+#'
+#' @export
+process_product_file <- function(product_file, paths, p) {
+    p()
+    synonyms <- get_synonyms()
+    synonyms_product <- synonyms$product
+    product_file_name <- tools::file_path_sans_ext(basename(product_file))
+    product_file_path <-
+        file.path(paths$tracker_root, product_file)
+
+    ParallelLogger::logDebug("Start process_product_file.")
+    ParallelLogger::logInfo(
+        "Current file: ",
+        product_file_name
+    )
+    df_product_raw <- arrow::read_parquet(product_file_path)
+
+    logfile <- paste0(product_file_name)
+    with_file_logger(logfile,
+        {
+            tryCatch(
+                df_product_raw <- reading_product_data_step2(df_product_raw, synonyms_product),
+                error = function(e) {
+                    ParallelLogger::logError("Could not process raw product data. Error = ", e$message, ".")
+                },
+                warning = function(w) {
+                    ParallelLogger::logWarn("Could not process raw product data. Warning = ", w$message, ".")
+                }
+            )
+
+            ParallelLogger::logDebug(
+                "df_product_raw dim: ",
+                dim(df_product_raw) %>% as.data.frame(),
+                "."
+            )
+
+            export_data_as_parquet(
+                data = df_product_raw,
+                filename = stringr::str_replace(product_file_name, "_product_raw", ""),
+                output_root = paths$product_data_cleaned,
+                suffix = "_product_cleaned"
+            )
+        },
+        output_root = paths$output_root
+    )
+
+    ParallelLogger::logInfo("Finish process_product_file.")
+}
diff --git a/R/link_product_patient.R b/R/link_product_patient.R
index 119de01..8816d57 100644
--- a/R/link_product_patient.R
+++ b/R/link_product_patient.R
@@ -14,7 +14,7 @@
 #' link_product_patient("path/to/product_data.parquet", "path/to/patient_data.parquet")
 #' }
 link_product_patient <- function(product_file, patient_file) {
-    logInfo("Trying to link product file ", product_file, " with patient file ", patient_file)
+    ParallelLogger::logInfo("Trying to link product file ", product_file, " with patient file ", patient_file)
 
     patient_data <- arrow::read_parquet(patient_file)
     product_data <- arrow::read_parquet(product_file)
@@ -44,7 +44,7 @@ link_product_patient <- function(product_file, patient_file) {
         tryCatch(
             {
                 if (nrow(summary_df) > 0) {
-                    logWarn(
+                    ParallelLogger::logWarn(
                         "The number of mismatched patient IDs between the product and patient data is ",
                         nrow(summary_df), ". ",
                         paste("File Name: ", summary_df$file_name,
@@ -56,18 +56,18 @@ link_product_patient <- function(product_file, patient_file) {
                 }
             },
             error = function(e) {
-                logError("Could not link csv files for product and patient data. Error: ", e$message)
+                ParallelLogger::logError("Could not link csv files for product and patient data. Error: ", e$message)
             },
             warning = function(w) {
-                logWarn("Could not link csv files for product and patient data. Warning: ", w$message)
+                ParallelLogger::logWarn("Could not link csv files for product and patient data. Warning: ", w$message)
             }
         )
     } else {
-        logInfo(
+        ParallelLogger::logInfo(
             "There are no mismatched patient IDs between the product data - ",
             product_file, " and patient data - ", patient_file
         )
     }
 
-    logInfo("Finished attempting to link product csv file with patient csv file.")
+    ParallelLogger::logInfo("Finished attempting to link product csv file with patient csv file.")
 }
diff --git a/R/logger.R b/R/logger.R
index 57958bd..e2a111b 100644
--- a/R/logger.R
+++ b/R/logger.R
@@ -9,17 +9,17 @@ setup_logger <- function(output_dir, log_name) {
         file.remove(logFileName)
     }
 
-    logger <- createLogger(
+    logger <- ParallelLogger::createLogger(
         name = "MAIN",
         threshold = "TRACE",
         appenders = list(
-            createFileAppender(
-                layout = layoutParallel,
+            ParallelLogger::createFileAppender(
+                layout = ParallelLogger::layoutParallel,
                 fileName = logFileName
             )
         )
     )
-    registerLogger(logger)
+    ParallelLogger::registerLogger(logger)
 
     log_dir <- file.path(output_dir, "logs")
 
@@ -33,14 +33,15 @@ setup_logger <- function(output_dir, log_name) {
 #' @param output_root Output root directory for the current process.
 #'
 #' @return returns the loggers that where previously set for usage with with_
+#' @export
 setup_file_logger <- function(logfile, output_root) {
-    loggers <- getLoggers()
-    clearLoggers()
+    loggers <- ParallelLogger::getLoggers()
+    ParallelLogger::clearLoggers()
     logFileName <- file.path(output_root, "logs", paste0(logfile, ".log"))
     if (file.exists(logFileName)) {
         file.remove(logFileName)
     }
-    addDefaultFileLogger(logFileName, name = logfile)
+    ParallelLogger::addDefaultFileLogger(logFileName, name = logfile)
     loggers
 }
 
@@ -58,9 +59,9 @@ setup_file_logger <- function(logfile, output_root) {
 with_file_logger <- withr::with_(
     setup_file_logger,
     function(loggers) {
-        clearLoggers()
+        ParallelLogger::clearLoggers()
         for (logger in loggers) {
-            registerLogger(logger)
+            ParallelLogger::registerLogger(logger)
         }
     }
 )
diff --git a/R/read_cleaned_patient_data.R b/R/read_cleaned_patient_data.R
index ae56e34..b217da4 100644
--- a/R/read_cleaned_patient_data.R
+++ b/R/read_cleaned_patient_data.R
@@ -7,7 +7,7 @@
 #' @export
 read_cleaned_patient_data <-
     function(input_root, patient_data_files) {
-        logInfo("Start read_cleaned_patient_data")
+        ParallelLogger::logInfo("Start read_cleaned_patient_data")
 
         patient_data <- patient_data_files %>%
             purrr::map(function(patient_file) {
@@ -17,6 +17,6 @@ read_cleaned_patient_data <-
 
 
 
-        logInfo("Finish read_cleaned_patient_data")
+        ParallelLogger::logInfo("Finish read_cleaned_patient_data")
         patient_data
     }
diff --git a/R/read_patient_data.R b/R/read_patient_data.R
index b292f1c..61f6839 100644
--- a/R/read_patient_data.R
+++ b/R/read_patient_data.R
@@ -1,9 +1,9 @@
 reading_patient_data <-
     function(tracker_data_file, columns_synonyms) {
-        logDebug("Start reading_patient_data.")
+        ParallelLogger::logDebug("Start reading_patient_data.")
         sheet_list <- readxl::excel_sheets(tracker_data_file)
         testit::assert(length(sheet_list) > 0)
-        logInfo(
+        ParallelLogger::logInfo(
             "Found ",
             length(sheet_list),
             " sheets inside the current file = ",
@@ -13,7 +13,7 @@ reading_patient_data <-
 
         month_list <-
             sheet_list[na.omit(pmatch(month.abb, sheet_list))]
-        logInfo(
+        ParallelLogger::logInfo(
             "Found ",
             length(month_list),
             " month sheets inside the current file = ",
@@ -24,18 +24,18 @@ reading_patient_data <-
 
         # Extract year
         year <- get_tracker_year(tracker_data_file, month_list)
-        logInfo("Tracker year = ", year, ".")
+        ParallelLogger::logInfo("Tracker year = ", year, ".")
         testit::assert(year %in% c(2017, 2018, 2019, 2020, 2021, 2022))
 
         tidy_tracker_list <- NULL
 
-        logDebug("Start processing sheets.")
+        ParallelLogger::logDebug("Start processing sheets.")
         for (curr_sheet in month_list) {
-            logDebug("Start processing sheet ", curr_sheet, ".")
+            ParallelLogger::logDebug("Start processing sheet ", curr_sheet, ".")
 
             df_patient <- extract_patient_data(tracker_data_file, curr_sheet, year)
             testit::assert(nrow(df_patient) > 0)
-            logDebug("df_patient dim: ", dim(df_patient) %>% as.data.frame(), ".")
+            ParallelLogger::logDebug("df_patient dim: ", dim(df_patient) %>% as.data.frame(), ".")
 
             df_patient <-
                 harmonize_patient_data_columns(df_patient, columns_synonyms)
@@ -66,15 +66,15 @@ reading_patient_data <-
                 )
 
             tidy_tracker_list[[curr_sheet]] <- df_patient
-            logDebug("Finish processing sheet ", curr_sheet, ".")
+            ParallelLogger::logDebug("Finish processing sheet ", curr_sheet, ".")
         }
 
-        logDebug("Start combining sheet data into single data frame.")
+        ParallelLogger::logDebug("Start combining sheet data into single data frame.")
         df_raw <- dplyr::bind_rows(tidy_tracker_list)
-        logDebug("Finish combining sheet data into single data frame.")
+        ParallelLogger::logDebug("Finish combining sheet data into single data frame.")
 
         if ("Patient List" %in% sheet_list) {
-            logDebug("Start extracting patient list.")
+            ParallelLogger::logDebug("Start extracting patient list.")
             patient_list <- extract_patient_data(
                 tracker_data_file,
                 "Patient List",
@@ -98,9 +98,9 @@ reading_patient_data <-
                 by = "id",
                 relationship = "many-to-one"
             )
-            logDebug("Finish extracting patient list.")
+            ParallelLogger::logDebug("Finish extracting patient list.")
         }
 
-        logInfo("Finish reading_patient_data.")
+        ParallelLogger::logInfo("Finish reading_patient_data.")
         return(df_raw)
     }
diff --git a/R/read_product_data.R b/R/read_product_data.R
index 10d8886..d1fc051 100644
--- a/R/read_product_data.R
+++ b/R/read_product_data.R
@@ -4,7 +4,7 @@
 # function based on parts from run_a4d_product_data.R and helper functions
 reading_product_data_step1 <-
     function(tracker_data_file, columns_synonyms) {
-        logDebug("Start reading_product_data_step1.")
+        ParallelLogger::logDebug("Start reading_product_data_step1.")
         # rename column names to match
         colnames(columns_synonyms) <- c("name_clean", "name_to_be_matched")
 
@@ -13,13 +13,13 @@ reading_product_data_step1 <-
         month_list <- sheet_list[na.omit(pmatch(month.abb, sheet_list))]
         year <- get_tracker_year(tracker_data_file, month_list)
 
-        logDebug("Found ", length(sheet_list), " sheets: ", paste(sheet_list, collapse = ", "), ".")
-        logDebug("Found ", length(month_list), " month sheets: ", paste(month_list, collapse = ", "), ".")
+        ParallelLogger::logDebug("Found ", length(sheet_list), " sheets: ", paste(sheet_list, collapse = ", "), ".")
+        ParallelLogger::logDebug("Found ", length(month_list), " month sheets: ", paste(month_list, collapse = ", "), ".")
 
 
         # loop through all months
         for (curr_sheet in month_list) {
-            logDebug("Start processing the following sheet: ", curr_sheet)
+            ParallelLogger::logDebug("Start processing the following sheet: ", curr_sheet)
 
             # open tracker data
             tracker_data <- data.frame(
@@ -37,7 +37,7 @@ reading_product_data_step1 <-
                     grepl("Description of Support", tracker_data[, ])))
             ) {
                 # go to next month
-                logInfo("Could not find product data in tracker data. Skipping ", curr_sheet, ".")
+                ParallelLogger::logInfo("Could not find product data in tracker data. Skipping ", curr_sheet, ".")
                 next
             }
 
@@ -46,7 +46,7 @@ reading_product_data_step1 <-
 
             # If after extraction, dataframe is empty, this iteration is also skipped.
             if (all(is.na(product_df))) {
-                logInfo("Product data is empty. Skipping ", curr_sheet, ".")
+                ParallelLogger::logInfo("Product data is empty. Skipping ", curr_sheet, ".")
                 next
             }
 
@@ -69,11 +69,11 @@ reading_product_data_step1 <-
             tryCatch(
                 {
                     if (num_na_rows > 0) {
-                        logInfo(curr_sheet, " the number of rows where the patient's name is missing: ", col_released, " is not NA and ", col_released_to, " (patient's name) is NA = ", num_na_rows)
+                        ParallelLogger::logInfo(curr_sheet, " the number of rows where the patient's name is missing: ", col_released, " is not NA and ", col_released_to, " (patient's name) is NA = ", num_na_rows)
                     }
                 },
                 error = function(e) {
-                    logError(curr_sheet, " trying with num_na_rows for products. Error: ", e$message)
+                    ParallelLogger::logError(curr_sheet, " trying with num_na_rows for products. Error: ", e$message)
                 }
             )
 
@@ -83,7 +83,7 @@ reading_product_data_step1 <-
             tryCatch(
                 {
                     if (nrow(non_processed_dates) > 0) {
-                        logWarn(
+                        ParallelLogger::logWarn(
                             curr_sheet,
                             " the number of rows with non-processed dates in product_entry_date is ",
                             nrow(non_processed_dates), ": ",
@@ -92,7 +92,7 @@ reading_product_data_step1 <-
                     }
                 },
                 error = function(e) {
-                    logError(curr_sheet, " trying with non_processed_dates in product_entry_date. Error: ", e$message)
+                    ParallelLogger::logError(curr_sheet, " trying with non_processed_dates in product_entry_date. Error: ", e$message)
                 }
             )
 
@@ -107,7 +107,7 @@ reading_product_data_step1 <-
             # Check if the entry dates for the balance match the month/year on the sheet
             check_entry_dates(product_df, curr_sheet)
 
-            logDebug("Finish processing sheet: ", curr_sheet)
+            ParallelLogger::logDebug("Finish processing sheet: ", curr_sheet)
 
             # combine all months
             if (!exists("df_final")) {
@@ -121,7 +121,7 @@ reading_product_data_step1 <-
         } else {
             return(NULL)
         }
-        logDebug("Finish reading_product_data_step1.")
+        ParallelLogger::logDebug("Finish reading_product_data_step1.")
     }
 
 
@@ -151,7 +151,7 @@ count_na_rows <- function(df, units_released_col, released_to_col) {
 #'
 #' @return This function does not return a value. It logs a warning message if there are any dates in 'product_entry_date' that don't match the month/year on the sheet.
 check_entry_dates <- function(df, Sheet) {
-    logDebug("Start check_entry_dates.")
+    ParallelLogger::logDebug("Start check_entry_dates.")
     # Check if the entry dates for the balance match the month/year on the sheet
     entry_dates_df <- df %>% dplyr::filter(grepl("^[0-9]+$", product_entry_date))
 
@@ -167,14 +167,14 @@ check_entry_dates <- function(df, Sheet) {
     not_same <- entry_dates_df[entry_dates_df$ed_month != entry_dates_df$product_table_month |
         entry_dates_df$ed_year != entry_dates_df$product_table_year, ]
     if (nrow(not_same) > 0) {
-        logWarn(
+        ParallelLogger::logWarn(
             Sheet,
             " the number of dates in product_entry_date that don't match the month/year on the sheet is ",
             nrow(not_same), ": ",
             paste(not_same$ed_date, collapse = ", ")
         )
     }
-    logDebug("Finish check_entry_dates.")
+    ParallelLogger::logDebug("Finish check_entry_dates.")
 }
 
 #' @title Remove Rows with NA Values in Specified Columns.
@@ -192,7 +192,7 @@ remove_rows_with_na_columns <-
         na_rows <- apply(df[column_names], 1, function(x) all(is.na(x)))
 
         # log message
-        logInfo(paste(length(na_rows[na_rows == T]), " rows deleted out of ", nrow(df), " rows (reason: rows not containing additional info).", sep = ""))
+        ParallelLogger::logInfo(paste(length(na_rows[na_rows == T]), " rows deleted out of ", nrow(df), " rows (reason: rows not containing additional info).", sep = ""))
 
         # Return the data frame without the NA rows
         return(df[!na_rows, ])
@@ -215,7 +215,7 @@ check_negative_balance <- function(df, Sheet) {
     # Check if there are any rows in the new data frame
     if (nrow(negative_df) > 0) {
         # Log a warning message with the number of negative values and their corresponding product_balance values
-        logWarn(
+        ParallelLogger::logWarn(
             Sheet,
             " number of negative values in product_balance on the sheet is ",
             nrow(negative_df), ": ",
@@ -242,7 +242,7 @@ switch_columns_stock <-
                     "product_units_received" = "product_received_from",
                     "product_received_from" = "product_units_received"
                 )
-            logDebug("Columns product_units_received and product_received_from were switched")
+            ParallelLogger::logDebug("Columns product_units_received and product_received_from were switched")
             return(df)
         } else {
             return(df)
@@ -290,14 +290,14 @@ report_unknown_products <- function(df, Sheet, stock_list_df) {
     # Check if there are any unknown products names
     if (length(unmatched_products) > 0) {
         # Log a warning message with the number of unknown products names
-        logWarn(
+        ParallelLogger::logWarn(
             Sheet,
             " the number of unknown product names on the sheet is ",
             length(unmatched_products), ": ",
             paste(unmatched_products, collapse = ", ")
         )
     } else {
-        logInfo(Sheet, " no unknown product names on the sheet")
+        ParallelLogger::logInfo(Sheet, " no unknown product names on the sheet")
     }
 }
 
@@ -317,16 +317,16 @@ report_unknown_products <- function(df, Sheet, stock_list_df) {
 #' product_list <- load_product_reference_data("your_file.xlsx")
 #' }
 load_product_reference_data <- function(stock_summary_xlsx = "reference_data/master_tracker_variables.xlsx") {
-    logDebug("Trying to load the product list from the Stock Summary, ", stock_summary_xlsx, "...")
+    ParallelLogger::logDebug("Trying to load the product list from the Stock Summary, ", stock_summary_xlsx, "...")
     tryCatch(
         {
             product_names_df <- readxl::read_excel(stock_summary_xlsx, "Stock_Summary")
             colnames(product_names_df) <- tolower(colnames(product_names_df))
-            logDebug(nrow(product_names_df), " product names were loaded from the Stock Summary.")
+            ParallelLogger::logDebug(nrow(product_names_df), " product names were loaded from the Stock Summary.")
             return(product_names_df)
         },
         error = function(e) {
-            logError("Error in loading stock product list: ", e)
+            ParallelLogger::logError("Error in loading stock product list: ", e)
         }
     )
 }
@@ -371,7 +371,7 @@ add_product_categories <- function(inventory_data, product_category_mapping) {
 #' df <- extract_unit_capacity(df, "product")
 #' }
 extract_unit_capacity <- function(df, column_name) {
-    logDebug("Trying to extract Unit Capacity from ", column_name, " column")
+    ParallelLogger::logDebug("Trying to extract Unit Capacity from ", column_name, " column")
 
     # Extract all symbols between parentheses
     df$product_unit_capacity <- stringr::str_extract(df[[column_name]], "\\(([^)]+)\\)")
@@ -393,7 +393,7 @@ extract_unit_capacity <- function(df, column_name) {
     # Add 1 to NA values
     df$product_unit_capacity[is.na(df$product_unit_capacity)] <- 1
 
-    logDebug("Finished extracting Unit Capacity from ", column_name, " column")
+    ParallelLogger::logDebug("Finished extracting Unit Capacity from ", column_name, " column")
 
     return(df)
 }
@@ -410,7 +410,7 @@ extract_unit_capacity <- function(df, column_name) {
 #' @return Cleaned product data for one specified tracker.
 reading_product_data_step2 <-
     function(df, columns_synonyms) {
-        logDebug("Start reading_product_data_step2.")
+        ParallelLogger::logDebug("Start reading_product_data_step2.")
 
         # rename column names to match
         colnames(columns_synonyms) <- c("name_clean", "name_to_be_matched")
@@ -425,7 +425,7 @@ reading_product_data_step2 <-
 
         # loop through all months
         for (sheet_month in unique(df$product_sheet_name)) {
-            logDebug(paste("Start processing the following sheet:", sheet_month))
+            ParallelLogger::logDebug(paste("Start processing the following sheet:", sheet_month))
 
             # filter on month sheet
             product_df <- df %>%
@@ -453,7 +453,7 @@ reading_product_data_step2 <-
             product_df <- remove_rows_with_na_columns(product_df, column_names_check)
             # jump to next sheet if dataframe empty from here
             if (nrow(product_df) == 0) {
-                logDebug(paste(sheet_month, " sheet is empty after filtering and skipped", sep = ""))
+                ParallelLogger::logDebug(paste(sheet_month, " sheet is empty after filtering and skipped", sep = ""))
                 next
             }
 
@@ -527,12 +527,12 @@ reading_product_data_step2 <-
             df_final <- df_final %>%
                 rbind(product_df)
 
-            logDebug(paste("Finished processing the following sheet:", sheet_month))
+            ParallelLogger::logDebug(paste("Finished processing the following sheet:", sheet_month))
         }
 
         if (nrow(df_final) > 0) {
             return(df_final)
         } else {
-            logDebug(paste("No product data extracted for the following tracker:", df$file_name[1]))
+            ParallelLogger::logDebug(paste("No product data extracted for the following tracker:", df$file_name[1]))
         }
     }
diff --git a/R/sysdata.rda b/R/sysdata.rda
index eb303cf804b77f1badf0b2d97168f0a3f6920e66..be6b002cff04d1329d0fdca86a74da91bb6a0978 100644
GIT binary patch
delta 19
XcmX>tdRmlAzMF#q445}^9pnN4Eu91g

delta 19
XcmX>tdRmlAzMF#q4A?eu9pnN4Evy6y

diff --git a/man/process_patient_file.Rd b/man/process_patient_file.Rd
new file mode 100644
index 0000000..69c9a62
--- /dev/null
+++ b/man/process_patient_file.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper_script_2.R
+\name{process_patient_file}
+\alias{process_patient_file}
+\title{Process and clean patient raw data}
+\usage{
+process_patient_file(patient_file, paths, p)
+}
+\arguments{
+\item{patient_file}{Path to patient raw parquet file from first script.}
+
+\item{paths}{List with paths to patient_data_cleaned and product_data_cleaned folders.}
+
+\item{p}{progressor from progressr package.}
+}
+\description{
+Read in the output of the first script as parquet (all character).
+Merge extracted patient data with the metadata schema that contains all
+necessary columns. Process each variable in three steps:
+\enumerate{
+\item Fix any problems with character representation.
+\item Convert column to target data type, for example a date or number.
+\item Apply any post-processing logic for the target data type.
+}
+}
diff --git a/man/process_product_file.Rd b/man/process_product_file.Rd
new file mode 100644
index 0000000..07d01d9
--- /dev/null
+++ b/man/process_product_file.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper_script_2.R
+\name{process_product_file}
+\alias{process_product_file}
+\title{Process and clean product raw data}
+\usage{
+process_product_file(product_file, paths, p)
+}
+\arguments{
+\item{product_file}{Path to product raw parquet file from first script.}
+
+\item{paths}{List with paths to patient_data_cleaned and product_data_cleaned folders.}
+
+\item{p}{progressor from progressr package.}
+}
+\description{
+A short description...
+}
diff --git a/man/process_tracker_file.Rd b/man/process_tracker_file.Rd
index b9728ae..2453d08 100644
--- a/man/process_tracker_file.Rd
+++ b/man/process_tracker_file.Rd
@@ -4,14 +4,14 @@
 \alias{process_tracker_file}
 \title{Process a single tracker file and extract patient and product data.}
 \usage{
-process_tracker_file(paths, tracker_file, tracker_name)
+process_tracker_file(tracker_file, paths, p)
 }
 \arguments{
-\item{paths}{a list with the paths to the tracker root dir, the patient and product output dir and the root output dir.}
+\item{tracker_file}{Filename of the tracler.}
 
-\item{tracker_file}{filename.}
+\item{paths}{a list with the paths to the tracker root dir, the patient and product output dir and the root output dir.}
 
-\item{tracker_name}{filename without extension.}
+\item{p}{progressor from progressr package.}
 }
 \description{
 Process a single tracker file and extract patient and product data.
diff --git a/reference_data/build_package_data.R b/reference_data/build_package_data.R
index 3d34392..7d3dc8c 100644
--- a/reference_data/build_package_data.R
+++ b/reference_data/build_package_data.R
@@ -1,5 +1,5 @@
 cleaning_config <- yaml::read_yaml(here::here("reference_data", "data_cleaning.yaml"))
-allowed_provinces <- yaml::read_yaml(here::here("reference_data", "provinces", "allowed_provinces.yaml")) %>% unlist()
+allowed_provinces <- yaml::read_yaml(here::here("reference_data", "provinces", "allowed_provinces.yaml")) |> unlist()
 
 for (i in length(cleaning_config$province$steps)) {
     if (cleaning_config$province$steps[[i]]$type == "allowed_values") {
diff --git a/scripts/run_script_1_extract_raw_data.R b/scripts/run_script_1_extract_raw_data.R
index d4961ca..7fe3684 100644
--- a/scripts/run_script_1_extract_raw_data.R
+++ b/scripts/run_script_1_extract_raw_data.R
@@ -1,11 +1,11 @@
 options(readxl.show_progress = FALSE)
-future::plan(future::multisession, workers = 4)
+future::plan(future::multisession)
 
-Sys.setenv(A4D_DATA_ROOT = "data")
 paths <- a4d::init_paths(c("patient_data_raw", "product_data_raw"), delete = TRUE)
-setup_logger(paths$output_root, "script1")
+a4d::setup_logger(paths$output_root, "script1")
 tracker_files <- a4d::get_files(paths$tracker_root)
-logInfo(
+
+ParallelLogger::logInfo(
     "Found ",
     length(tracker_files),
     " xlsx files under ",
@@ -13,7 +13,7 @@ logInfo(
     "."
 )
 
-logInfo("Start processing tracker files.")
+ParallelLogger::logInfo("Start processing tracker files.")
 
 progressr::with_progress({
     p <- progressr::progressor(steps = length(tracker_files))
@@ -21,11 +21,11 @@ progressr::with_progress({
     result <- furrr::future_map(
         tracker_files,
         a4d::process_tracker_file,
-        paths=paths,
-        p=p
+        paths = paths,
+        p = p
     )
 })
 
-logInfo("Finish processing all tracker files.")
+ParallelLogger::logInfo("Finish processing all tracker files.")
 
-clearLoggers()
+ParallelLogger::clearLoggers()
diff --git a/scripts/run_script_2_clean_data.R b/scripts/run_script_2_clean_data.R
index e20727a..21986c9 100644
--- a/scripts/run_script_2_clean_data.R
+++ b/scripts/run_script_2_clean_data.R
@@ -1,371 +1,53 @@
 options(readxl.show_progress = FALSE)
-
-ERROR_VAL_NUMERIC <<- 999999
-ERROR_VAL_CHARACTER <<- "Undefined"
-ERROR_VAL_DATE <<- "9999-09-09"
-
-main <- function() {
-    paths <- init_paths(c("patient_data_cleaned", "product_data_cleaned"), delete = TRUE)
-    setup_logger(paths$output_root, "script2")
-    patient_data_files <- get_files(paths$tracker_root, pattern = "patient_raw.parquet$")
-    product_data_files <- get_files(paths$tracker_root, pattern = "product_raw.parquet$")
-    logInfo(
-        "Found ",
-        length(patient_data_files),
-        " patient csv files under ",
-        paths$tracker_root,
-        "."
-    )
-    logInfo(
-        "Found ",
-        length(product_data_files),
-        " product csv files under ",
-        paths$tracker_root,
-        "."
-    )
-
-    logInfo("Start processing patient csv files.")
-
-    for (i in seq_along(patient_data_files)) {
-        patient_file <- patient_data_files[i]
-        patient_file_name <- tools::file_path_sans_ext(basename(patient_file))
-        logfile <- paste0(patient_file_name)
-        with_file_logger(logfile,
-            {
-                tryCatch(
-                    process_patient_file(paths, patient_file, patient_file_name, paths$patient_data_cleaned),
-                    error = function(e) {
-                        logError("Could not process raw patient data. Error = ", e$message, ".")
-                    },
-                    warning = function(w) {
-                        logWarn("Could not process raw patient data. Warning = ", w$message, ".")
-                    }
-                )
-            },
-            output_root = paths$output_root
-        )
-        cat(paste("Processed ", i, " of ", length(patient_data_files), " (", round(i / length(patient_data_files) * 100, 0), "%) patient files.\n"))
-    }
-
-    logInfo("Finish processing all patient csv files.")
-
-    logDebug("Start processing product csv files.")
-    synonyms <- get_synonyms()
-    synonyms_product <- synonyms$product
-
-    for (i in seq_along(product_data_files)) {
-        product_file <- product_data_files[i]
-        product_file_name <- tools::file_path_sans_ext(basename(product_file))
-        logfile <- paste0(product_file_name)
-
-        with_file_logger(logfile,
-            {
-                tryCatch(
-                    process_product_file(paths, product_file, product_file_name, synonyms_product, paths$product_data_cleaned),
-                    error = function(e) {
-                        logError("Could not process raw product data. Error = ", e$message, ".")
-                    },
-                    warning = function(w) {
-                        logWarn("Could not process raw product data. Warning = ", w$message, ".")
-                    }
-                )
-            },
-            output_root = paths$output_root
-        )
-        cat(paste("Processed ", i, " of ", length(product_data_files), " (", round(i / length(product_data_files) * 100, 0), "%) product files.\n"))
-    }
-
-    logInfo("Finish processing all csv files.")
-}
-
-
-process_patient_file <- function(paths, patient_file, patient_file_name, output_root) {
-    patient_file_path <-
-        file.path(paths$tracker_root, patient_file)
-    logDebug("Start process_patient_file.")
-    logInfo(
-        "Current file: ",
-        patient_file_name
-    )
-
-    allowed_provinces <- get_allowed_provinces()
-
-    df_patient_raw <- arrow::read_parquet(patient_file_path)
-
-    # filter all rows with no patient id or patient name
-    df_patient_raw <- df_patient_raw %>%
-        dplyr::filter(!(is.na(id) & is.na(name))) %>%
-        dplyr::filter(!(id == "0" & name == "0"))
-
-    # --- TRANSFORMATIONS ---
-    # data before 2019 had only one column for updated hba1c and fbg
-    # with date as part of the value
-    if (!"hba1c_updated_date" %in% colnames(df_patient_raw) && "hba1c_updated" %in% colnames(df_patient_raw)) {
-        logInfo("Column updated_hba1c_date not found. Trying to parse from hba1c_updated.")
-        df_patient_raw <-
-            extract_date_from_measurement(df_patient_raw, "hba1c_updated")
-        logDebug("Finished parsing dates from hba1c_updated.")
-    }
-
-    if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mg" %in% colnames(df_patient_raw)) {
-        logInfo("Column updated_fbg_date not found. Trying to parse from fbg_updated_mg.")
-        df_patient_raw <-
-            extract_date_from_measurement(df_patient_raw, "fbg_updated_mg")
-        logDebug("Finished parsing dates from fbg_updated_mg.")
-    }
-
-    if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mmol" %in% colnames(df_patient_raw)) {
-        logInfo("Column fbg_updated_date not found. Trying to parse from fbg_updated_mmol.")
-        df_patient_raw <-
-            extract_date_from_measurement(df_patient_raw, "fbg_updated_mmol")
-        logDebug("Finished parsing dates from fbg_updated_mmol.")
-    }
-
-    # blood pressure is given as sys/dias value pair,
-    # so we split this column in two separate columns
-    if ("blood_pressure_mmhg" %in% colnames(df_patient_raw)) {
-        df_patient_raw <- split_bp_in_sys_and_dias(df_patient_raw)
-    }
-
-    # The maximum value available for hba1c will be around 14% - 18%,
-    # depending on the equipment being used.
-    # If the reading is above the maximum available value the > sign is used -
-    # we would prefer to retain this character in the database as it is important for data analysis.
-    logInfo("Adding columns hba1c_baseline_exceeds and hba1c_updated_exceeds.")
-    df_patient_raw <- df_patient_raw %>%
-        dplyr::mutate(
-            hba1c_baseline_exceeds = ifelse(grepl(">|<", hba1c_baseline), TRUE, FALSE),
-            hba1c_updated_exceeds = ifelse(grepl(">|<", hba1c_updated), TRUE, FALSE)
-        )
-
-    # --- META SCHEMA ---
-    # meta schema has all final columns for the database
-    # along with their corresponding data types
-    logInfo("Creating meta schema.")
-    # short type string for read_csv:
-    # iiinDccDcnnDnncnlnlDncDccDDDccccDccccciDciiiDn
-    schema <- tibble::tibble(
-        # clinic_visit = logical(),
-        # complication_screening = character(),
-        # complication_screening_date = lubridate::as_date(1),
-        # complication_screening_results = character(),
-        # dm_complication_comment = character(), # TODO
-        # dm_complication_eye = character(), # TODO
-        # dm_complication_kidney = character(), # TODO
-        # dm_complication_other = character(), # TODO
-        # est_strips_pmonth = integer(),
-        # family_support_scale = character(), # TODO
-        # inactive_reason = character(),
-        # insulin_dosage = character(),
-        # meter_received_date = lubridate::as_date(1), # TODO
-        # remarks = character(),
-        # remote_followup = logical(),
-        # additional_support = character(),
-        age = integer(),
-        blood_pressure_dias_mmhg = integer(),
-        blood_pressure_sys_mmhg = integer(),
-        bmi = numeric(),
-        bmi_date = lubridate::as_date(1),
-        clinic_code = character(),
-        country_code = character(),
-        dob = lubridate::as_date(1),
-        edu_occ = character(),
-        fbg_baseline_mg = numeric(),
-        fbg_baseline_mmol = numeric(),
-        fbg_updated_date = lubridate::as_date(1),
-        fbg_updated_mg = numeric(),
-        fbg_updated_mmol = numeric(),
-        file_name = character(),
-        hba1c_baseline = numeric(),
-        hba1c_baseline_exceeds = logical(),
-        hba1c_updated = numeric(),
-        hba1c_updated_exceeds = logical(),
-        hba1c_updated_date = lubridate::as_date(1),
-        height = numeric(),
-        hospitalisation_cause = character(),
-        hospitalisation_date = lubridate::as_date(1),
-        id = character(),
-        insulin_regimen = character(),
-        last_clinic_visit_date = lubridate::as_date(1),
-        last_remote_followup_date = lubridate::as_date(1),
-        lost_date = lubridate::as_date(1),
-        name = character(),
-        observations = character(),
-        observations_category = character(),
-        province = character(),
-        recruitment_date = lubridate::as_date(1),
-        sex = character(),
-        sheet_name = character(),
-        status = character(),
-        status_out = character(),
-        support_from_a4d = character(),
-        t1d_diagnosis_age = integer(),
-        t1d_diagnosis_date = lubridate::as_date(1),
-        t1d_diagnosis_with_dka = character(),
-        testing_frequency = integer(),
-        tracker_date = lubridate::as_date(1),
-        tracker_month = integer(),
-        tracker_year = integer(),
-        updated_2022_date = lubridate::as_date(1),
-        weight = numeric()
-    )
-
-    cols_extra <- colnames(df_patient_raw)[!colnames(df_patient_raw) %in% colnames(schema)]
-    logWarn("Extra columns in patient data: ", paste(cols_extra, collapse = ", "))
-
-    cols_missing <-
-        colnames(schema)[!colnames(schema) %in% colnames(df_patient_raw)]
-    logWarn("Missing columns in patient data: ", paste(cols_missing, collapse = ", "))
-
-    # add all columns of schema to df_patient_raw
-    # keep all rows, only append missing cols
-    logInfo("Merging df_patient with meta schema and selecting all columns of meta schema.")
-    df_patient <- merge.default(df_patient_raw, schema, all.x = T)
-    df_patient <- df_patient[colnames(schema)]
-
-    # the cleaning, fixing and validating happens in three major steps:
-    # 1. make sure we fix any known problems in the raw character columns
-    df_patient <-
-        df_patient %>%
-        dplyr::rowwise() %>%
-        # 1. handle known problems before converting to target type
-        dplyr::mutate(
-            t1d_diagnosis_age = fix_t1d_diagnosis_age(t1d_diagnosis_age, id),
-            hba1c_baseline = stringr::str_replace(hba1c_baseline, "<|>", ""),
-            hba1c_updated = stringr::str_replace(hba1c_updated, "<|>", ""),
-            fbg_baseline_mg = fix_fbg(fbg_baseline_mg),
-            fbg_baseline_mmol = fix_fbg(fbg_baseline_mmol),
-            fbg_updated_mg = fix_fbg(fbg_updated_mg),
-            fbg_updated_mmol = fix_fbg(fbg_updated_mmol),
-            testing_frequency = fix_testing_frequency(testing_frequency)
-        ) %>%
-        # 2. convert the refined character columns into the target data type
-        dplyr::mutate(
-            dplyr::across(
-                schema %>% dplyr::select(tidyselect::where(is.numeric)) %>% names(),
-                \(x) convert_to(correct_decimal_sign(x), as.numeric, ERROR_VAL_NUMERIC, dplyr::cur_column(), id = id)
-            ),
-            dplyr::across(
-                schema %>% dplyr::select(tidyselect::where(is.logical)) %>% names(),
-                \(x) convert_to(x, as.logical, FALSE, dplyr::cur_column(), id = id)
-            ),
-            dplyr::across(
-                schema %>% dplyr::select(tidyselect::where(lubridate::is.Date)) %>% names(),
-                \(x) convert_to(fix_digit_date(x), parse_dates, as.Date(ERROR_VAL_DATE), dplyr::cur_column(), id = id)
-            ),
-            dplyr::across(
-                schema %>% dplyr::select(tidyselect::where(is.integer)) %>% names(),
-                \(x) convert_to(x, function(x) as.integer(round(as.double(x))), ERROR_VAL_NUMERIC, dplyr::cur_column(), id = id)
-            )
-        ) %>%
-        # 3. fix remaining problems in the target data type
-        dplyr::mutate(
-            # height and weight are needed to calculate bmi
-            height = transform_cm_to_m(height) %>%
-                cut_numeric_value(min = 0, max = 2.3, col_name = "height"),
-            weight = cut_numeric_value(weight, min = 0, max = 200, col_name = "weight"),
-            bmi = fix_bmi(weight, height, id) %>%
-                cut_numeric_value(min = 4, max = 60, "bmi"),
-            age = fix_age(age, dob, tracker_year, tracker_month, id) %>%
-                cut_numeric_value(min = 0, max = 25, "age"),
-            sex = fix_sex(sex, id),
-            hba1c_baseline = cut_numeric_value(hba1c_baseline, min = 4, max = 18, "hba1c_baseline"),
-            # https://www.cleveland19.com/story/1425584/ohio-man-holds-world-record-of-highest-blood-sugar/
-            fbg_baseline_mmol = cut_numeric_value(fbg_baseline_mmol, min = 0, max = 136.5, "fbg_baseline_mmol"),
-            # https://www.cleveland19.com/story/1425584/ohio-man-holds-world-record-of-highest-blood-sugar/
-            fbg_updated_mmol = cut_numeric_value(fbg_updated_mmol, min = 0, max = 136.5, "fbg_updated_mmol"),
-            blood_pressure_sys_mmhg = cut_numeric_value(blood_pressure_sys_mmhg, min = 20, max = 250, "blood_pressure_sys_mmhg"),
-            blood_pressure_dias_mmhg = cut_numeric_value(blood_pressure_dias_mmhg, min = 20, max = 220, "blood_pressure_dias_mmhg"),
-            tracker_date = lubridate::ym(paste(tracker_year, tracker_month, sep="-")),
-            !!!parse_character_cleaning_config(a4d:::config$cleaning),
-            # should be fixed last as other fix functions use id to log invalid rows!
-            id = fix_id(id)
-        ) %>%
-        dplyr::ungroup()
-
-    # add clinic and country code after having fixed all issues with patient id
-    cc_codes <- extract_country_clinic_code(df_patient)
-    df_patient["clinic_code"] <- cc_codes$clinic_code
-    df_patient["country_code"] <- cc_codes$country_code
-
-    # Formula to calculate mmol/l from mg/dl: mmol/l = mg/dl / 18
-    if (all(is.na(df_patient$fbg_baseline_mmol))) {
-        df_patient <- df_patient %>%
-            dplyr::mutate(fbg_baseline_mmol = dplyr::case_when(
-                fbg_baseline_mg != ERROR_VAL_NUMERIC ~ fbg_baseline_mg / 18
-            ))
-    }
-    if (all(is.na(df_patient$fbg_updated_mmol))) {
-        df_patient <- df_patient %>%
-            dplyr::mutate(fbg_updated_mmol = dplyr::case_when(
-                fbg_updated_mg != ERROR_VAL_NUMERIC ~ fbg_updated_mg / 18
-            ))
-    }
-
-    # Formula to calculate mg/dl from mmol/l: mg/dl = 18 × mmol/l
-    if (all(is.na(df_patient$fbg_baseline_mg))) {
-        df_patient <- df_patient %>%
-            dplyr::mutate(fbg_baseline_mg = dplyr::case_when(
-                fbg_baseline_mmol != ERROR_VAL_NUMERIC ~ fbg_baseline_mmol * 18
-            ))
-    }
-    if (all(is.na(df_patient$fbg_updated_mg))) {
-        df_patient <- df_patient %>%
-            dplyr::mutate(fbg_updated_mg = dplyr::case_when(
-                fbg_updated_mmol != ERROR_VAL_NUMERIC ~ fbg_updated_mmol * 18
-            ))
-    }
-
-    # sort by year and month like it is in the tracker files
-    df_patient <- df_patient %>%
-        dplyr::arrange(tracker_date, id)
-
-    logDebug(
-        "df_patient dim: ",
-        dim(df_patient) %>% as.data.frame(),
-        "."
-    )
-
-    export_data_as_parquet(
-        data = df_patient,
-        filename = stringr::str_replace(patient_file_name, "_patient_raw", ""),
-        output_root = output_root,
-        suffix = "_patient_cleaned"
-    )
-
-    logInfo("Finish process_patient_file.")
-}
-
-
-process_product_file <- function(paths, product_file, product_file_name, synonyms_product, output_root) {
-    product_file_path <-
-        file.path(paths$tracker_root, product_file)
-    logDebug("Start process_product_file.")
-    logInfo(
-        "Current file: ",
-        product_file_name
-    )
-
-    df_product_raw <- arrow::read_parquet(product_file_path)
-
-    df_product_raw <- reading_product_data_step2(df_product_raw, synonyms_product)
-
-    logDebug(
-        "df_product_raw dim: ",
-        dim(df_product_raw) %>% as.data.frame(),
-        "."
-    )
-
-    export_data_as_parquet(
-        data = df_product_raw,
-        filename = stringr::str_replace(product_file_name, "_product_raw", ""),
-        output_root = output_root,
-        suffix = "_product_cleaned"
-    )
-
-    logInfo("Finish process_product_file.")
-}
-
-main()
-
-clearLoggers()
+future::plan(future::multisession)
+
+paths <- a4d::init_paths(c("patient_data_cleaned", "product_data_cleaned"), delete = TRUE)
+a4d::setup_logger(paths$output_root, "script2")
+patient_data_files <- a4d::get_files(paths$tracker_root, pattern = "patient_raw.parquet$")
+product_data_files <- a4d::get_files(paths$tracker_root, pattern = "product_raw.parquet$")
+ParallelLogger::logInfo(
+    "Found ",
+    length(patient_data_files),
+    " patient csv files under ",
+    paths$tracker_root,
+    "."
+)
+ParallelLogger::logInfo(
+    "Found ",
+    length(product_data_files),
+    " product csv files under ",
+    paths$tracker_root,
+    "."
+)
+
+ParallelLogger::logInfo("Start processing patient csv files.")
+
+progressr::with_progress({
+    p <- progressr::progressor(steps = length(patient_data_files))
+
+    result <- furrr::future_map(
+        patient_data_files,
+        a4d::process_patient_file,
+        paths = paths,
+        p = p
+    )
+})
+
+ParallelLogger::logInfo("Finish processing all patient csv files.")
+
+ParallelLogger::logDebug("Start processing product csv files.")
+
+progressr::with_progress({
+    p <- progressr::progressor(steps = length(product_data_files))
+
+    result <- furrr::future_map(
+        product_data_files,
+        a4d::process_product_file,
+        paths = paths,
+        p = p
+    )
+})
+
+ParallelLogger::logInfo("Finish processing all csv files.")
+
+ParallelLogger::clearLoggers()
diff --git a/scripts/run_script_3_create_tables.R b/scripts/run_script_3_create_tables.R
index 7962867..4cd6216 100644
--- a/scripts/run_script_3_create_tables.R
+++ b/scripts/run_script_3_create_tables.R
@@ -9,14 +9,14 @@ main <- function() {
     setup_logger(paths$output_root, "script3")
     patient_data_files <- get_files(file.path(paths$output_root, "patient_data_cleaned"), pattern = "\\.parquet$")
     product_data_files <- get_files(file.path(paths$output_root, "product_data_cleaned"), pattern = "\\.parquet$")
-    logInfo(
+    ParallelLogger::logInfo(
         "Found ",
         length(patient_data_files),
         " patient csv files under ",
         paths$tracker_root,
         "."
     )
-    logInfo(
+    ParallelLogger::logInfo(
         "Found ",
         length(product_data_files),
         " product csv files under ",
@@ -24,7 +24,7 @@ main <- function() {
         "."
     )
 
-    logInfo("Start creating table csv files.")
+    ParallelLogger::logInfo("Start creating table csv files.")
 
     logfile <- "table_patient_data_static"
     with_file_logger(logfile,
@@ -34,10 +34,10 @@ main <- function() {
                     create_table_patient_data_static(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
                 },
                 error = function(e) {
-                    logError("Could not create table csv for static patient data. Error: ", e$message)
+                    ParallelLogger::logError("Could not create table csv for static patient data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not create table csv for static patient data. Error: ", w$message)
+                    ParallelLogger::logWarn("Could not create table csv for static patient data. Error: ", w$message)
                 }
             )
         },
@@ -52,10 +52,10 @@ main <- function() {
                     create_table_patient_data_monthly(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
                 },
                 error = function(e) {
-                    logError("Could not create table csv for monthly patient data. Error: ", e$message)
+                    ParallelLogger::logError("Could not create table csv for monthly patient data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
+                    ParallelLogger::logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
                 }
             )
         },
@@ -76,10 +76,10 @@ main <- function() {
                     )
                 },
                 error = function(e) {
-                    logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
+                    ParallelLogger::logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
+                    ParallelLogger::logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
                 }
             )
         },
@@ -94,19 +94,19 @@ main <- function() {
                     create_table_product_data(file.path(paths$output_root, "product_data_cleaned"), paths$tables)
                 },
                 error = function(e) {
-                    logError("Could not create table for product data. Error: ", e$message)
+                    ParallelLogger::logError("Could not create table for product data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not create table for product data. Warning: ", w$message)
+                    ParallelLogger::logWarn("Could not create table for product data. Warning: ", w$message)
                 }
             )
         },
         output_root = paths$output_root
     )
 
-    logInfo("Finish creating table files.")
+    ParallelLogger::logInfo("Finish creating table files.")
 
-    logInfo("Trying to link files for product and patient data.")
+    ParallelLogger::logInfo("Trying to link files for product and patient data.")
 
     logfile <- "link_product_patient_data"
 
@@ -120,17 +120,17 @@ main <- function() {
                     )
                 },
                 error = function(e) {
-                    logError("Could not link files for product and patient data. Error: ", e$message)
+                    ParallelLogger::logError("Could not link files for product and patient data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not link files for product and patient data. Warning: ", w$message)
+                    ParallelLogger::logWarn("Could not link files for product and patient data. Warning: ", w$message)
                 }
             )
         },
         output_root = paths$output_root
     )
 
-    logInfo("Finished linking files for product and patient data.")
+    ParallelLogger::logInfo("Finished linking files for product and patient data.")
 }
 
 main()

From 7e3b3f0fed472f761cfd00ecc4d48797b7aead99 Mon Sep 17 00:00:00 2001
From: Michael Aydinbas <michael.aydinbas@gmail.com>
Date: Thu, 28 Dec 2023 21:27:53 +0100
Subject: [PATCH 3/8] working future code for script 1 and 2

---
 DESCRIPTION                                |   4 +-
 NAMESPACE                                  |   3 +
 R/create_table_patient_data.R              |   4 +-
 R/create_table_patient_data_changes_only.R |   4 +-
 R/create_table_patient_data_static.R       |   4 +-
 R/create_table_product_data.R              |  42 +-
 R/helper_main.R                            |  18 +-
 R/helper_patient_data_fix.R                |  38 +-
 R/helper_product_data.R                    |  12 +-
 R/helper_read_patient_data.R               |  32 +-
 R/helper_scipt_1.R                         |  93 +++--
 R/helper_script_2.R                        | 364 ++++++++++++++++++
 R/link_product_patient.R                   |  12 +-
 R/logger.R                                 |  19 +-
 R/read_cleaned_patient_data.R              |   4 +-
 R/read_patient_data.R                      |  26 +-
 R/read_product_data.R                      |  60 +--
 R/sysdata.rda                              | Bin 2763 -> 2763 bytes
 man/process_patient_file.Rd                |  25 ++
 man/process_product_file.Rd                |  18 +
 man/process_tracker_file.Rd                |   8 +-
 reference_data/build_package_data.R        |   2 +-
 scripts/run_script_1_extract_raw_data.R    |  18 +-
 scripts/run_script_2_clean_data.R          | 422 +++------------------
 scripts/run_script_3_create_tables.R       |  32 +-
 25 files changed, 678 insertions(+), 586 deletions(-)
 create mode 100644 R/helper_script_2.R
 create mode 100644 man/process_patient_file.Rd
 create mode 100644 man/process_product_file.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index a1d7052..c273a8f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,8 +8,7 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
-Depends: 
-    ParallelLogger
+Depends:
 Imports: 
     data.table,
     digest,
@@ -17,6 +16,7 @@ Imports:
     here,
     lubridate,
     openxlsx,
+    ParallelLogger,
     readr,
     readxl,
     stringr,
diff --git a/NAMESPACE b/NAMESPACE
index f8e779f..ca93149 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -21,7 +21,9 @@ export(harmonize_patient_data_columns)
 export(id_2_county_hospisal)
 export(init_paths)
 export(process_patient_data)
+export(process_patient_file)
 export(process_product_data)
+export(process_product_file)
 export(process_tracker_file)
 export(read_cleaned_patient_data)
 export(read_column_synonyms)
@@ -29,6 +31,7 @@ export(report_empty_intersections)
 export(sanitize_str)
 export(select_A4D_directory)
 export(set_a4d_data_root)
+export(setup_file_logger)
 export(setup_logger)
 export(with_file_logger)
 importFrom(data.table,"%like%")
diff --git a/R/create_table_patient_data.R b/R/create_table_patient_data.R
index 3f5c864..c6793ed 100644
--- a/R/create_table_patient_data.R
+++ b/R/create_table_patient_data.R
@@ -9,7 +9,7 @@
 #' @param input_root root directory of the input CSV files.
 #' @param output_root root directory of the output folder.
 create_table_patient_data_monthly <- function(patient_data_files, input_root, output_root) {
-    logInfo("Start creating single csv for table patient_data_monthly.")
+    ParallelLogger::logInfo("Start creating single csv for table patient_data_monthly.")
 
     # THERE MIGHT BE MONTHLY COLUMNS MISSING - PLEASE ADD THEM
     dynamic_patient_columns <-
@@ -58,5 +58,5 @@ create_table_patient_data_monthly <- function(patient_data_files, input_root, ou
         suffix = ""
     )
 
-    logInfo("Finish creating single csv for table patient_data_monthly.")
+    ParallelLogger::logInfo("Finish creating single csv for table patient_data_monthly.")
 }
diff --git a/R/create_table_patient_data_changes_only.R b/R/create_table_patient_data_changes_only.R
index 6a06fe8..1a4178c 100644
--- a/R/create_table_patient_data_changes_only.R
+++ b/R/create_table_patient_data_changes_only.R
@@ -16,7 +16,7 @@ create_table_longitudinal_data <-
              output_root,
              variable,
              name) {
-        logInfo("Start creating single csv for table create_table_longitudinal_data and variable ", variable)
+        ParallelLogger::logInfo("Start creating single csv for table create_table_longitudinal_data and variable ", variable)
 
         dynamic_patient_columns <-
             c(
@@ -79,5 +79,5 @@ create_table_longitudinal_data <-
             suffix = ""
         )
 
-        logInfo("Finish creating single csv for table create_table_longitudinal_data and variable ", variable)
+        ParallelLogger::logInfo("Finish creating single csv for table create_table_longitudinal_data and variable ", variable)
     }
diff --git a/R/create_table_patient_data_static.R b/R/create_table_patient_data_static.R
index aaf180d..55c8c1d 100644
--- a/R/create_table_patient_data_static.R
+++ b/R/create_table_patient_data_static.R
@@ -9,7 +9,7 @@
 #' @param input_root root directory of the input CSV files.
 #' @param output_root root directory of the output folder.
 create_table_patient_data_static <- function(patient_data_files, input_root, output_root) {
-    logInfo("Start creating single csv for table patient_data_static.")
+    ParallelLogger::logInfo("Start creating single csv for table patient_data_static.")
 
     # THERE MIGHT BE STATIC COLUMNS MISSING - PLEASE ADD THEM
     static_patient_columns <-
@@ -59,5 +59,5 @@ create_table_patient_data_static <- function(patient_data_files, input_root, out
         suffix = "_static"
     )
 
-    logInfo("Finish creating single csv for table patient_data_static.")
+    ParallelLogger::logInfo("Finish creating single csv for table patient_data_static.")
 }
diff --git a/R/create_table_product_data.R b/R/create_table_product_data.R
index aaa6448..4ee7eaa 100644
--- a/R/create_table_product_data.R
+++ b/R/create_table_product_data.R
@@ -17,7 +17,7 @@
 #' create_table_product_data("path/to/input/directory", "path/to/output/directory")
 #' }
 create_table_product_data <- function(input_root, output_root) {
-    logInfo("Start creating single file for table product_data.")
+    ParallelLogger::logInfo("Start creating single file for table product_data.")
 
     # Get a list of all CSV files in the input_root directory
     files <- list.files(input_root, pattern = "*.parquet", full.names = TRUE)
@@ -25,7 +25,7 @@ create_table_product_data <- function(input_root, output_root) {
     # Read all CSV files and store them in a list
     data_list <- lapply(files, function(x) arrow::read_parquet(x))
 
-    logInfo(length(data_list), " files will be processed for creating the single file for table product_data.")
+    ParallelLogger::logInfo(length(data_list), " files will be processed for creating the single file for table product_data.")
 
     # Get the union of all column names
     all_names <- unique(unlist(lapply(data_list, colnames)))
@@ -39,25 +39,25 @@ create_table_product_data <- function(input_root, output_root) {
     # Merge all data frames
     merged_data <- do.call(rbind, data_list)
 
-    logDebug("Copying original parient IDs...")
+    ParallelLogger::logDebug("Copying original parient IDs...")
     merged_data$orig_product_released_to <- merged_data$product_released_to
 
-    logDebug("Trying to fix patient IDs...")
+    ParallelLogger::logDebug("Trying to fix patient IDs...")
     merged_data$product_released_to <- sapply(merged_data$product_released_to, fix_id)
 
-    logDebug("Extracting product_county and product_hospisal from patients IDs...")
+    ParallelLogger::logDebug("Extracting product_county and product_hospisal from patients IDs...")
     merged_data <- id_2_county_hospisal(
         merged_data, "product_released_to",
         "product_country", "product_hospital"
     )
 
-    logDebug("Calculating the most frequent 'product_hospital' for each 'file_name'...")
+    ParallelLogger::logDebug("Calculating the most frequent 'product_hospital' for each 'file_name'...")
     tryCatch(
         {
             merged_data <- calculate_most_frequent(merged_data, "file_name", "product_hospital", "table_hospital")
         },
         error = function(e) {
-            logError("Error in calculating the most frequent 'product_hospital': ", e)
+            ParallelLogger::logError("Error in calculating the most frequent 'product_hospital': ", e)
         }
     )
 
@@ -67,14 +67,14 @@ create_table_product_data <- function(input_root, output_root) {
             merged_data <- calculate_most_frequent(merged_data, "file_name", "product_country", "table_country")
         },
         error = function(e) {
-            logError("Error in calculating the most frequent 'product_country': ", e)
+            ParallelLogger::logError("Error in calculating the most frequent 'product_country': ", e)
         }
     )
 
     # Reorder, add, and ensures the correct data type for each column according to the list of fields
     merged_data <- preparing_product_fields(merged_data)
 
-    logDebug("Checking 'table_country' for each 'file_name'...")
+    ParallelLogger::logDebug("Checking 'table_country' for each 'file_name'...")
     report_empty_intersections(merged_data, "file_name", "table_country")
 
     # Write the merged and processed data to a file in the output_root directory
@@ -85,7 +85,7 @@ create_table_product_data <- function(input_root, output_root) {
         suffix = ""
     )
 
-    logInfo("Finish creating single file for table product_data.")
+    ParallelLogger::logInfo("Finish creating single file for table product_data.")
 }
 
 
@@ -168,7 +168,7 @@ preparing_product_fields <- function(merged_data) {
         "table_hospital" = "character"
     )
 
-    logInfo("Start processing fields for the single csv product_data...")
+    ParallelLogger::logInfo("Start processing fields for the single csv product_data...")
 
     # Check if all fields are present in merged_data
     missing_fields <- setdiff(names(fields), names(merged_data))
@@ -186,7 +186,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.Date(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    logWarn(paste(
+                    ParallelLogger::logWarn(paste(
                         "In", field, "incorrect date values were replaced with",
                         ERROR_VAL_DATE, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -198,7 +198,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.numeric(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    logWarn(paste(
+                    ParallelLogger::logWarn(paste(
                         "In", field, "incorrect numeric values were replaced with",
                         ERROR_VAL_NUMERIC, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -210,7 +210,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.integer(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    logWarn(paste(
+                    ParallelLogger::logWarn(paste(
                         "In", field, "incorrect integer values were replaced with",
                         ERROR_VAL_NUMERIC, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -222,7 +222,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- as.character(original_values)
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    logWarn(paste(
+                    ParallelLogger::logWarn(paste(
                         "In", field, "incorrect character values  were replaced with",
                         ERROR_VAL_CHARACTER, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -231,19 +231,19 @@ preparing_product_fields <- function(merged_data) {
                 }
             }
         }, warning = function(w) {
-            logError(paste("Warning in converting", field, ": ", w))
+            ParallelLogger::logError(paste("Warning in converting", field, ": ", w))
         }, error = function(e) {
-            logWarn(paste("Error in converting", field, ": ", e))
+            ParallelLogger::logWarn(paste("Error in converting", field, ": ", e))
         }, finally = {
-            logDebug(paste("Finished converting", field))
+            ParallelLogger::logDebug(paste("Finished converting", field))
         })
     }
 
     # Reorder the columns according to the list of fields
-    logInfo("Reorder the columns according to the list of fields...")
+    ParallelLogger::logInfo("Reorder the columns according to the list of fields...")
     merged_data <- merged_data[, c(names(fields), setdiff(names(merged_data), names(fields)))]
 
-    logInfo("Finished processing fields for the single csv product_data.")
+    ParallelLogger::logInfo("Finished processing fields for the single csv product_data.")
 
     return(merged_data)
 }
@@ -313,7 +313,7 @@ report_empty_intersections <- function(df, row_category, col_category) {
     df_row_sums <- df_row_sums[df_row_sums$sum == 0, ]
 
     if (nrow(df_row_sums) > 0) {
-        logWarn(
+        ParallelLogger::logWarn(
             "The number of ", row_category, " with empty ", col_category, " is ",
             nrow(df_row_sums), ": ",
             paste(df_row_sums$row_name, sep = "", collapse = ", ")
diff --git a/R/helper_main.R b/R/helper_main.R
index ca8d1a3..b5cd0a1 100644
--- a/R/helper_main.R
+++ b/R/helper_main.R
@@ -139,7 +139,7 @@ read_column_synonyms <- function(synonym_file, path_prefixes = c("reference_data
 #' )
 #' }
 export_data <- function(data, filename, output_root, suffix) {
-    logDebug("Start export_data. Suffix = ", suffix, ".")
+    ParallelLogger::logDebug("Start export_data. Suffix = ", suffix, ".")
     data %>%
         write.csv(
             file =
@@ -156,7 +156,7 @@ export_data <- function(data, filename, output_root, suffix) {
             fileEncoding = "UTF-16LE",
             quote = T
         )
-    logInfo("Finish export_data. Suffix = ", suffix, ".")
+    ParallelLogger::logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -177,12 +177,12 @@ export_data <- function(data, filename, output_root, suffix) {
 #' )
 #' }
 export_data_as_parquet <- function(data, filename, output_root, suffix) {
-    logDebug("Start export_data. Suffix = ", suffix, ".")
+    ParallelLogger::logDebug("Start export_data. Suffix = ", suffix, ".")
     data %>%
         arrow::write_parquet(
             sink = file.path(output_root, paste0(filename, suffix, ".parquet")),
         )
-    logInfo("Finish export_data. Suffix = ", suffix, ".")
+    ParallelLogger::logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -193,7 +193,7 @@ export_data_as_parquet <- function(data, filename, output_root, suffix) {
 #'
 #' @return tibble with patient data
 read_raw_csv <- function(file) {
-    logDebug("Start reading data with read_csv.")
+    ParallelLogger::logDebug("Start reading data with read_csv.")
     df_patient_raw <- readr::read_csv(
         file,
         name_repair = "check_unique",
@@ -202,9 +202,9 @@ read_raw_csv <- function(file) {
         col_types = readr::cols(.default = "c"),
         locale = readr::locale(encoding = "UTF-16LE")
     )
-    logDebug("Finished loading data with read_csv.")
-    logInfo("Dim: ", dim(df_patient_raw))
-    logInfo("Columns: ", spec(df_patient_raw))
+    ParallelLogger::logDebug("Finished loading data with read_csv.")
+    ParallelLogger::logInfo("Dim: ", dim(df_patient_raw))
+    ParallelLogger::logInfo("Columns: ", spec(df_patient_raw))
 
     df_patient_raw
 }
@@ -218,6 +218,6 @@ read_raw_csv <- function(file) {
 #' @return A named character vector with all allowed provinces.
 get_allowed_provinces <- function() {
     ## Should new countries and provinces be added, update the YAML file
-    provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") %>% unlist()
+    provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") |> unlist()
     return(provinces)
 }
diff --git a/R/helper_patient_data_fix.R b/R/helper_patient_data_fix.R
index 05bb168..5b10d72 100644
--- a/R/helper_patient_data_fix.R
+++ b/R/helper_patient_data_fix.R
@@ -24,11 +24,11 @@ convert_to <- function(x, cast_fnc, error_val, col_name = "", id = "") {
     x <- tryCatch(
         cast_fnc(x),
         error = function(e) {
-            logError("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
+            ParallelLogger::logError("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
             x <- error_val
         },
         warning = function(w) {
-            logWarn("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
+            ParallelLogger::logWarn("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
             x <- error_val
         }
     )
@@ -55,7 +55,7 @@ cut_numeric_value <- function(x,
     }
 
     if (x < min || x > max) {
-        logWarn(
+        ParallelLogger::logWarn(
             "Found invalid value ", x, " for column ", col_name, " outside [", min, ", ", max, "]. ",
             "Value was replaced with ", ERROR_VAL_NUMERIC, "."
         )
@@ -154,7 +154,7 @@ parse_dates <- function(date) {
     parsed_date <- suppressWarnings(lubridate::as_date(date))
 
     if (is.na(parsed_date)) {
-        logWarn(
+        ParallelLogger::logWarn(
             "Could not parse date value ", date, ". ",
             "Trying to parse with lubridate::parse_date_time and orders = c('dmy', 'dmY', 'by', 'bY')."
         )
@@ -195,9 +195,9 @@ check_allowed_values <- function(x, valid_values, id, replace_invalid = TRUE, er
     valid_value_mapping <- setNames(as.list(valid_values), sanitize_str(valid_values))
 
     if (!sanitize_str(x) %in% names(valid_value_mapping)) {
-        logWarn("Patient ", id, ": Value ", x, " for column ", col, " is not in the list of allowed values. ")
+        ParallelLogger::logWarn("Patient ", id, ": Value ", x, " for column ", col, " is not in the list of allowed values. ")
         if (replace_invalid) {
-            logInfo("Replacing ", x, " with ", error_val, ".")
+            ParallelLogger::logInfo("Replacing ", x, " with ", error_val, ".")
             return(error_val)
         } else {
             return(x)
@@ -275,13 +275,13 @@ fix_age <- function(age, dob, tracker_year, tracker_month, id) {
         }
 
         if (is.na(age)) {
-            logWarn(
+            ParallelLogger::logWarn(
                 "Patient ", id, ": age is missing. Using calculated age ", calc_age,
                 " instead of original age."
             )
         } else {
             if (calc_age != age) {
-                logWarn(
+                ParallelLogger::logWarn(
                     "Patient ", id, ": age ", age, " is different from calculated age ", calc_age,
                     ". Using calculated age instead of original age."
                 )
@@ -289,7 +289,7 @@ fix_age <- function(age, dob, tracker_year, tracker_month, id) {
         }
 
         if (calc_age < 0) {
-            logWarn("Patient ", id, ": calculated age is negative. Something went wrong.")
+            ParallelLogger::logWarn("Patient ", id, ": calculated age is negative. Something went wrong.")
             calc_age <- ERROR_VAL_NUMERIC
         }
     }
@@ -322,11 +322,11 @@ fix_bmi <- function(weight, height, id) {
 
 
     if (!is.na(weight) && weight == ERROR_VAL_CHARACTER) {
-        logWarn("Patient ", id, ": the weight is out of bounds.")
+        ParallelLogger::logWarn("Patient ", id, ": the weight is out of bounds.")
     }
 
     if (!is.na(height) && height == ERROR_VAL_CHARACTER) {
-        logWarn("Patient ", id, ": the height is out of bounds.")
+        ParallelLogger::logWarn("Patient ", id, ": the height is out of bounds.")
     }
     bmi
 }
@@ -353,7 +353,7 @@ fix_sex <- function(sex, id) {
     )
 
     if (!is.na(fixed_sex) && fixed_sex == ERROR_VAL_CHARACTER) {
-        logWarn("Patient ", id, ": sex ", sex, " is not in the list of synonyms. Replacing it with ", fixed_sex, ".")
+        ParallelLogger::logWarn("Patient ", id, ": sex ", sex, " is not in the list of synonyms. Replacing it with ", fixed_sex, ".")
     }
     fixed_sex
 }
@@ -463,7 +463,7 @@ fix_testing_frequency <- function(test_frq) {
     }
 
     if (grepl("-", test_frq, fixed = TRUE)) {
-        logInfo("Found a range for testing_frequency. Replacing it with the mean.")
+        ParallelLogger::logInfo("Found a range for testing_frequency. Replacing it with the mean.")
         test_frq <- try(as.character(replace_range_with_mean(test_frq), silent = TRUE))
     }
 
@@ -492,7 +492,7 @@ replace_range_with_mean <- function(x) {
 #'
 #' @return data frame with two new columns: blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.
 split_bp_in_sys_and_dias <- function(df) {
-    logInfo("Splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
+    ParallelLogger::logInfo("Splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
     df <- df %>%
         dplyr::mutate(
             blood_pressure_mmhg = dplyr::case_when(
@@ -502,7 +502,7 @@ split_bp_in_sys_and_dias <- function(df) {
         )
 
     if (paste(ERROR_VAL_NUMERIC, ERROR_VAL_NUMERIC, sep = "/") %in% df$blood_pressure_mmhg) {
-        logWarn(
+        ParallelLogger::logWarn(
             "Found invalid values for column blood_pressure_mmhg that do not follow the format X/Y. ",
             "Values were replaced with ", ERROR_VAL_NUMERIC, "."
         )
@@ -514,7 +514,7 @@ split_bp_in_sys_and_dias <- function(df) {
             delim = "/",
             names = c("blood_pressure_sys_mmhg", "blood_pressure_dias_mmhg"),
         )
-    logDebug("Finished splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
+    ParallelLogger::logDebug("Finished splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
     df
 }
 
@@ -550,12 +550,12 @@ fix_id <- function(id) {
     id <- stringr::str_replace(id, "-", "_")
 
     if (!grepl("^[[:upper:]]{2}_[[:upper:]]{2}[[:digit:]]{3}$", id)) {
-        logWarn("Patient ", id, ": id cannot be matched to a 7 letter alpha numeric code like XX_YY001. ")
+        ParallelLogger::logWarn("Patient ", id, ": id cannot be matched to a 7 letter alpha numeric code like XX_YY001. ")
         if (stringr::str_length(id) > 8) {
-            logWarn("Patient ", id, ": id was truncated because it is longer than 8 characters.")
+            ParallelLogger::logWarn("Patient ", id, ": id was truncated because it is longer than 8 characters.")
             id <- stringr::str_sub(id, 1, 8)
         } else {
-            logError("Patient ", id, ": id is not valid.")
+            ParallelLogger::logError("Patient ", id, ": id is not valid.")
             id <- ERROR_VAL_CHARACTER
         }
     }
diff --git a/R/helper_product_data.R b/R/helper_product_data.R
index 6f04cd7..3e56f6b 100644
--- a/R/helper_product_data.R
+++ b/R/helper_product_data.R
@@ -83,7 +83,7 @@ get_patient_end <- function(df, j) {
 
 # @Description: Reads product data from a monthly file based on extraction logic
 extract_product_data <- function(monthly_tracker_df) {
-    logDebug("Starting extract_product_data.")
+    ParallelLogger::logDebug("Starting extract_product_data.")
     start_df_msd <- NULL
     end_df_msd <- NULL
 
@@ -103,7 +103,7 @@ extract_product_data <- function(monthly_tracker_df) {
     # Clean empty remaining first row
     product_data_df <- set_second_row_as_headers_and_remove_first_row(product_data_df)
 
-    logDebug("Finish extract_product_data.")
+    ParallelLogger::logDebug("Finish extract_product_data.")
 
     return(product_data_df)
 }
@@ -134,7 +134,7 @@ extract_product_data <- function(monthly_tracker_df) {
 # column synonyms to unify column names
 # @columns_synonyms: Long format output of read_column_synonyms to match columns
 harmonize_input_data_columns <- function(product_df, columns_synonyms) {
-    logDebug("Start harmonize_input_data_columns.")
+    ParallelLogger::logDebug("Start harmonize_input_data_columns.")
     # In case that there is additional data in strange columns, keep only relevant columns
     # keep.cols <- names(product_df) %in% c("")
 
@@ -147,14 +147,14 @@ harmonize_input_data_columns <- function(product_df, columns_synonyms) {
     ## report all column names which have not been found
     unknown_column_names <- colnames(product_df)[!colnames(product_df) %in% synonym_headers]
     if (length(unknown_column_names) > 0) {
-        logWarn("Unknown column names in sheet: ", paste(unknown_column_names, collapse = ", "))
+        ParallelLogger::logWarn("Unknown column names in sheet: ", paste(unknown_column_names, collapse = ", "))
     }
 
     # replacing var codes
     colnames_found <- match(colnames(product_df), synonym_headers, nomatch = 0)
     colnames(product_df)[colnames(product_df) %in% synonym_headers] <- columns_synonyms$name_clean[colnames_found]
 
-    logDebug("Finish harmonize_input_data_columns.")
+    ParallelLogger::logDebug("Finish harmonize_input_data_columns.")
     if (sum(colnames_found == 0) != 0) {
         "Non-matching column names found (see 0)"
         # SK: remove non matching column names
@@ -282,7 +282,7 @@ update_receivedfrom <- function(product_df) {
                 grepl("Balance", product_units_received, ignore.case = TRUE) & !is.na(product_received_from) ~ product_received_from
             )) %>%
             dplyr::mutate(product_units_released = ifelse(!is.na(product_received_from), NA, product_units_released))
-        logInfo("The rule for the case was applied successfully- Released (product_units_released) column also includes values for Start/End Balance")
+        ParallelLogger::logInfo("The rule for the case was applied successfully- Released (product_units_released) column also includes values for Start/End Balance")
     }
     return(product_df)
 }
diff --git a/R/helper_read_patient_data.R b/R/helper_read_patient_data.R
index cca5779..8f61b04 100644
--- a/R/helper_read_patient_data.R
+++ b/R/helper_read_patient_data.R
@@ -1,7 +1,7 @@
 # extracting country and clinic code from patient ID
 # expects that patient ID has a certain format
 extract_country_clinic_code <- function(patient_data) {
-    logDebug("Start extract_country_clinic_code.")
+    ParallelLogger::logDebug("Start extract_country_clinic_code.")
     patient_ids <- patient_data["id"] %>%
         dplyr::filter(id != "0") %>%
         tidyr::drop_na() %>%
@@ -19,9 +19,9 @@ extract_country_clinic_code <- function(patient_data) {
     clinic_code <-
         names(sort(table(patient_ids$clinic), decreasing = T))[1]
 
-    logDebug("country_code = ", country_code, ".")
-    logDebug("clinic_code = ", clinic_code, ".")
-    logDebug("Finish extract_country_clinic_code.")
+    ParallelLogger::logDebug("country_code = ", country_code, ".")
+    ParallelLogger::logDebug("clinic_code = ", clinic_code, ".")
+    ParallelLogger::logDebug("Finish extract_country_clinic_code.")
     return(list("country_code" = country_code, "clinic_code" = clinic_code))
 }
 
@@ -40,9 +40,9 @@ extract_country_clinic_code <- function(patient_data) {
 #' @return data.frame with the patient data
 #' @export
 extract_patient_data <- function(tracker_data_file, sheet, year) {
-    logDebug("Start extract_patient_data for sheet = ", sheet, ".")
+    ParallelLogger::logDebug("Start extract_patient_data for sheet = ", sheet, ".")
 
-    logDebug("Start openxlsx::read.xlsx to get tracker_data.")
+    ParallelLogger::logDebug("Start openxlsx::read.xlsx to get tracker_data.")
     tracker_data <- openxlsx::read.xlsx(
         xlsxFile = tracker_data_file,
         sheet = sheet,
@@ -59,7 +59,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     # col_names = F,
     # .name_repair = "unique_quiet"
     # )
-    logDebug("Finish openxlsx::read.xlsx.")
+    ParallelLogger::logDebug("Finish openxlsx::read.xlsx.")
 
     # Assumption: first column is always empty until patient data begins
     patient_data_range <- which(!is.na(tracker_data[, 1]))
@@ -80,8 +80,8 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
         row_max <- row_max + 1
     }
 
-    logInfo("Patient data found in rows ", row_min, " to ", row_max, ".")
-    logDebug("Start readxl::read_excel to get patient data.")
+    ParallelLogger::logInfo("Patient data found in rows ", row_min, " to ", row_max, ".")
+    ParallelLogger::logDebug("Start readxl::read_excel to get patient data.")
     df_patient <- readxl::read_excel(
         path = tracker_data_file,
         sheet = sheet,
@@ -91,11 +91,11 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
         col_types = c("text"),
         .name_repair = "unique_quiet"
     )
-    logDebug("Finish readxl::read_excel.")
+    ParallelLogger::logDebug("Finish readxl::read_excel.")
 
     if (header_cols[2] == header_cols_2[2]) {
         # take into account that date info gets separated from the updated values (not in the same row, usually in the bottom row)
-        logInfo("Read in multiline header.")
+        ParallelLogger::logInfo("Read in multiline header.")
 
         diff_colnames <- which((header_cols != header_cols_2))
         header_cols[diff_colnames] <-
@@ -106,7 +106,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     }
 
     colnames(df_patient) <- header_cols
-    logDebug("Found patient column names = ", paste(header_cols, collapse = ","), ".")
+    ParallelLogger::logDebug("Found patient column names = ", paste(header_cols, collapse = ","), ".")
 
     # delete columns without a header (=NA)
     df_patient <- df_patient[, !is.na(colnames(df_patient))]
@@ -117,7 +117,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     df_patient <-
         df_patient[rowSums(is.na(df_patient)) != ncol(df_patient), ]
 
-    logDebug("Finish extract_patient_data.")
+    ParallelLogger::logDebug("Finish extract_patient_data.")
 
     df_patient
 }
@@ -137,7 +137,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
 #' @export
 harmonize_patient_data_columns <-
     function(patient_df, columns_synonyms) {
-        logDebug("Start harmonize_patient_data_columns.")
+        ParallelLogger::logDebug("Start harmonize_patient_data_columns.")
 
         patient_df <- patient_df[!is.na(names(patient_df))]
 
@@ -151,11 +151,11 @@ harmonize_patient_data_columns <-
 
         mismatching_column_ids <- which(colnames_found == 0)
         if (length(mismatching_column_ids) > 0) {
-            logWarn(
+            ParallelLogger::logWarn(
                 "Non-matching column names found: ", paste(colnames(patient_df)[mismatching_column_ids], collapse = ","), "."
             )
         }
 
-        logDebug("Finish harmonize_patient_data_columns.")
+        ParallelLogger::logDebug("Finish harmonize_patient_data_columns.")
         patient_df
     }
diff --git a/R/helper_scipt_1.R b/R/helper_scipt_1.R
index 222d5f1..f1cf102 100644
--- a/R/helper_scipt_1.R
+++ b/R/helper_scipt_1.R
@@ -1,5 +1,3 @@
-
-
 #' @title Process a single tracker file and extract patient and product data.
 #'
 #' @param tracker_file Filename of the tracler.
@@ -13,56 +11,57 @@ process_tracker_file <- function(tracker_file, paths, p) {
     synonyms <- get_synonyms()
     tracker_data_file <-
         file.path(paths$tracker_root, tracker_file)
-    logDebug("Start process_tracker_file.")
-    logInfo(
+
+    ParallelLogger::logDebug("Start process_tracker_file.")
+    ParallelLogger::logInfo(
         "Current file: ",
         tracker_name
     )
 
     logfile <- paste0(tracker_name, "_", "patient")
     with_file_logger(logfile,
-                     {
-                         tryCatch(
-                             process_patient_data(
-                                 tracker_name = tracker_name,
-                                 tracker_data_file = tracker_data_file,
-                                 output_root = paths$patient_data_raw,
-                                 synonyms_patient = synonyms$patient
-                             ),
-                             error = function(e) {
-                                 logError("Could not process patient data. Error = ", e$message, ".")
-                             },
-                             warning = function(w) {
-                                 logWarn("Could not process patient data. Warning = ", w$message, ".")
-                             }
-                         )
-                     },
-                     output_root = paths$output_root
+        {
+            tryCatch(
+                process_patient_data(
+                    tracker_name = tracker_name,
+                    tracker_data_file = tracker_data_file,
+                    output_root = paths$patient_data_raw,
+                    synonyms_patient = synonyms$patient
+                ),
+                error = function(e) {
+                    ParallelLogger::logError("Could not process patient data. Error = ", e$message, ".")
+                },
+                warning = function(w) {
+                    ParallelLogger::logWarn("Could not process patient data. Warning = ", w$message, ".")
+                }
+            )
+        },
+        output_root = paths$output_root
     )
 
     logfile <- paste0(tracker_name, "_", "product")
 
     with_file_logger(logfile,
-                     {
-                         tryCatch(
-                             process_product_data(
-                                 tracker_name = tracker_name,
-                                 tracker_data_file = tracker_data_file,
-                                 output_root = paths$product_data_raw,
-                                 synonyms_product = synonyms$product
-                             ),
-                             error = function(e) {
-                                 logError("Could not process product data. Error = ", e$message, ".")
-                             },
-                             warning = function(w) {
-                                 logWarn("Could not process product data. Warning = ", w$message, ".")
-                             }
-                         )
-                     },
-                     output_root = paths$output_root
+        {
+            tryCatch(
+                process_product_data(
+                    tracker_name = tracker_name,
+                    tracker_data_file = tracker_data_file,
+                    output_root = paths$product_data_raw,
+                    synonyms_product = synonyms$product
+                ),
+                error = function(e) {
+                    ParallelLogger::logError("Could not process product data. Error = ", e$message, ".")
+                },
+                warning = function(w) {
+                    ParallelLogger::logWarn("Could not process product data. Warning = ", w$message, ".")
+                }
+            )
+        },
+        output_root = paths$output_root
     )
 
-    logInfo("Finish process_tracker_file.")
+    ParallelLogger::logDebug("Finish process_tracker_file.")
 }
 
 
@@ -79,7 +78,7 @@ process_patient_data <-
              tracker_data_file,
              output_root,
              synonyms_patient) {
-        logDebug("Start process_patient_data.")
+        ParallelLogger::logDebug("Start process_patient_data.")
 
         df_raw_patient <-
             reading_patient_data(
@@ -89,7 +88,7 @@ process_patient_data <-
 
         df_raw_patient <- df_raw_patient %>% dplyr::mutate(file_name = tracker_name)
 
-        logDebug(
+        ParallelLogger::logDebug(
             "df_raw_patient dim: ",
             dim(df_raw_patient) %>% as.data.frame(),
             "."
@@ -102,7 +101,7 @@ process_patient_data <-
             suffix = "_patient_raw"
         )
 
-        logInfo("Finish process_patient_data.")
+        ParallelLogger::logDebug("Finish process_patient_data.")
     }
 
 
@@ -119,7 +118,7 @@ process_product_data <-
              tracker_data_file,
              output_root,
              synonyms_product) {
-        logDebug("Start process_product_data.")
+        ParallelLogger::logDebug("Start process_product_data.")
 
         df_raw_product <-
             reading_product_data_step1(
@@ -130,10 +129,10 @@ process_product_data <-
         if (!is.null(df_raw_product)) {
             df_raw_product <- df_raw_product %>% dplyr::mutate(file_name = tracker_name)
         } else {
-            logDebug("Empty product data")
+            ParallelLogger::logDebug("Empty product data")
         }
 
-        logDebug(
+        ParallelLogger::logDebug(
             "df_raw_product dim: ",
             dim(df_raw_product) %>% as.data.frame(),
             "."
@@ -148,7 +147,7 @@ process_product_data <-
                 suffix = "_product_raw"
             )
         } else {
-            logWarn("No product data in the file")
+            ParallelLogger::logWarn("No product data in the file")
         }
-        logDebug("Finish process_product_data.")
+        ParallelLogger::logDebug("Finish process_product_data.")
     }
diff --git a/R/helper_script_2.R b/R/helper_script_2.R
new file mode 100644
index 0000000..035b5db
--- /dev/null
+++ b/R/helper_script_2.R
@@ -0,0 +1,364 @@
+#' Process and clean patient raw data
+#'
+#' @description
+#' Read in the output of the first script as parquet (all character).
+#' Merge extracted patient data with the metadata schema that contains all
+#' necessary columns. Process each variable in three steps:
+#' 1. Fix any problems with character representation.
+#' 2. Convert column to target data type, for example a date or number.
+#' 3. Apply any post-processing logic for the target data type.
+#'
+#'
+#' @param patient_file Path to patient raw parquet file from first script.
+#' @param paths List with paths to patient_data_cleaned and product_data_cleaned folders.
+#' @param p progressor from progressr package.
+#'
+#' @export
+process_patient_file <- function(patient_file, paths, p) {
+    p()
+
+    ERROR_VAL_NUMERIC <<- 999999
+    ERROR_VAL_CHARACTER <<- "Undefined"
+    ERROR_VAL_DATE <<- "9999-09-09"
+
+    patient_file_name <- tools::file_path_sans_ext(basename(patient_file))
+    patient_file_path <-
+        file.path(paths$tracker_root, patient_file)
+    output_root <- paths$patient_data_cleaned
+
+    ParallelLogger::logDebug("Start process_patient_file.")
+    ParallelLogger::logInfo(
+        "Current file: ",
+        patient_file_name
+    )
+
+    logfile <- paste0(patient_file_name)
+
+    with_file_logger(logfile,
+        {
+            tryCatch(
+                process_patient_file_worker(
+                    patient_file_path = patient_file_path,
+                    patient_file_name = patient_file_name,
+                    output_root = output_root
+                ),
+                error = function(e) {
+                    ParallelLogger::logError("Could not process raw patient data. Error = ", e$message, ".")
+                },
+                warning = function(w) {
+                    ParallelLogger::logWarn("Could not process raw patient data. Warning = ", w$message, ".")
+                }
+            )
+        },
+        output_root = paths$output_root
+    )
+
+    ParallelLogger::logInfo("Finish process_patient_file.")
+}
+
+
+process_patient_file_worker <- function(patient_file_path, patient_file_name, output_root) {
+    allowed_provinces <- get_allowed_provinces()
+
+    df_patient_raw <- arrow::read_parquet(patient_file_path)
+
+    # filter all rows with no patient id or patient name
+    df_patient_raw <- df_patient_raw %>%
+        dplyr::filter(!(is.na(id) & is.na(name))) %>%
+        dplyr::filter(!(id == "0" & name == "0"))
+
+    # --- TRANSFORMATIONS ---
+    # data before 2019 had only one column for updated hba1c and fbg
+    # with date as part of the value
+    if (!"hba1c_updated_date" %in% colnames(df_patient_raw) && "hba1c_updated" %in% colnames(df_patient_raw)) {
+        ParallelLogger::logInfo("Column updated_hba1c_date not found. Trying to parse from hba1c_updated.")
+        df_patient_raw <-
+            extract_date_from_measurement(df_patient_raw, "hba1c_updated")
+        ParallelLogger::logDebug("Finished parsing dates from hba1c_updated.")
+    }
+
+    if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mg" %in% colnames(df_patient_raw)) {
+        ParallelLogger::logInfo("Column updated_fbg_date not found. Trying to parse from fbg_updated_mg.")
+        df_patient_raw <-
+            extract_date_from_measurement(df_patient_raw, "fbg_updated_mg")
+        ParallelLogger::logDebug("Finished parsing dates from fbg_updated_mg.")
+    }
+
+    if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mmol" %in% colnames(df_patient_raw)) {
+        ParallelLogger::logInfo("Column fbg_updated_date not found. Trying to parse from fbg_updated_mmol.")
+        df_patient_raw <-
+            extract_date_from_measurement(df_patient_raw, "fbg_updated_mmol")
+        ParallelLogger::logDebug("Finished parsing dates from fbg_updated_mmol.")
+    }
+
+    # blood pressure is given as sys/dias value pair,
+    # so we split this column in two separate columns
+    if ("blood_pressure_mmhg" %in% colnames(df_patient_raw)) {
+        df_patient_raw <- split_bp_in_sys_and_dias(df_patient_raw)
+    }
+
+    # The maximum value available for hba1c will be around 14% - 18%,
+    # depending on the equipment being used.
+    # If the reading is above the maximum available value the > sign is used -
+    # we would prefer to retain this character in the database as it is important for data analysis.
+    ParallelLogger::logInfo("Adding columns hba1c_baseline_exceeds and hba1c_updated_exceeds.")
+    df_patient_raw <- df_patient_raw %>%
+        dplyr::mutate(
+            hba1c_baseline_exceeds = ifelse(grepl(">|<", hba1c_baseline), TRUE, FALSE),
+            hba1c_updated_exceeds = ifelse(grepl(">|<", hba1c_updated), TRUE, FALSE)
+        )
+
+    # --- META SCHEMA ---
+    # meta schema has all final columns for the database
+    # along with their corresponding data types
+    ParallelLogger::logInfo("Creating meta schema.")
+    # short type string for read_csv:
+    # iiinDccDcnnDnncnlnlDncDccDDDccccDccccciDciiiDn
+    schema <- tibble::tibble(
+        # clinic_visit = logical(),
+        # complication_screening = character(),
+        # complication_screening_date = lubridate::as_date(1),
+        # complication_screening_results = character(),
+        # dm_complication_comment = character(), # TODO
+        # dm_complication_eye = character(), # TODO
+        # dm_complication_kidney = character(), # TODO
+        # dm_complication_other = character(), # TODO
+        # est_strips_pmonth = integer(),
+        # family_support_scale = character(), # TODO
+        # inactive_reason = character(),
+        # insulin_dosage = character(),
+        # meter_received_date = lubridate::as_date(1), # TODO
+        # remarks = character(),
+        # remote_followup = logical(),
+        # additional_support = character(),
+        age = integer(),
+        blood_pressure_dias_mmhg = integer(),
+        blood_pressure_sys_mmhg = integer(),
+        bmi = numeric(),
+        bmi_date = lubridate::as_date(1),
+        clinic_code = character(),
+        country_code = character(),
+        dob = lubridate::as_date(1),
+        edu_occ = character(),
+        fbg_baseline_mg = numeric(),
+        fbg_baseline_mmol = numeric(),
+        fbg_updated_date = lubridate::as_date(1),
+        fbg_updated_mg = numeric(),
+        fbg_updated_mmol = numeric(),
+        file_name = character(),
+        hba1c_baseline = numeric(),
+        hba1c_baseline_exceeds = logical(),
+        hba1c_updated = numeric(),
+        hba1c_updated_exceeds = logical(),
+        hba1c_updated_date = lubridate::as_date(1),
+        height = numeric(),
+        hospitalisation_cause = character(),
+        hospitalisation_date = lubridate::as_date(1),
+        id = character(),
+        insulin_regimen = character(),
+        last_clinic_visit_date = lubridate::as_date(1),
+        last_remote_followup_date = lubridate::as_date(1),
+        lost_date = lubridate::as_date(1),
+        name = character(),
+        observations = character(),
+        observations_category = character(),
+        province = character(),
+        recruitment_date = lubridate::as_date(1),
+        sex = character(),
+        sheet_name = character(),
+        status = character(),
+        status_out = character(),
+        support_from_a4d = character(),
+        t1d_diagnosis_age = integer(),
+        t1d_diagnosis_date = lubridate::as_date(1),
+        t1d_diagnosis_with_dka = character(),
+        testing_frequency = integer(),
+        tracker_date = lubridate::as_date(1),
+        tracker_month = integer(),
+        tracker_year = integer(),
+        updated_2022_date = lubridate::as_date(1),
+        weight = numeric()
+    )
+
+    cols_extra <- colnames(df_patient_raw)[!colnames(df_patient_raw) %in% colnames(schema)]
+    ParallelLogger::logWarn("Extra columns in patient data: ", paste(cols_extra, collapse = ", "))
+
+    cols_missing <-
+        colnames(schema)[!colnames(schema) %in% colnames(df_patient_raw)]
+    ParallelLogger::logWarn("Missing columns in patient data: ", paste(cols_missing, collapse = ", "))
+
+    # add all columns of schema to df_patient_raw
+    # keep all rows, only append missing cols
+    ParallelLogger::logInfo("Merging df_patient with meta schema and selecting all columns of meta schema.")
+    df_patient <- merge.default(df_patient_raw, schema, all.x = T)
+    df_patient <- df_patient[colnames(schema)]
+
+    # the cleaning, fixing and validating happens in three major steps:
+    # 1. make sure we fix any known problems in the raw character columns
+    df_patient <-
+        df_patient %>%
+        dplyr::rowwise() %>%
+        # 1. handle known problems before converting to target type
+        dplyr::mutate(
+            t1d_diagnosis_age = fix_t1d_diagnosis_age(t1d_diagnosis_age, id),
+            hba1c_baseline = stringr::str_replace(hba1c_baseline, "<|>", ""),
+            hba1c_updated = stringr::str_replace(hba1c_updated, "<|>", ""),
+            fbg_baseline_mg = fix_fbg(fbg_baseline_mg),
+            fbg_baseline_mmol = fix_fbg(fbg_baseline_mmol),
+            fbg_updated_mg = fix_fbg(fbg_updated_mg),
+            fbg_updated_mmol = fix_fbg(fbg_updated_mmol),
+            testing_frequency = fix_testing_frequency(testing_frequency)
+        ) %>%
+        # 2. convert the refined character columns into the target data type
+        dplyr::mutate(
+            dplyr::across(
+                schema %>% dplyr::select(tidyselect::where(is.numeric)) %>% names(),
+                \(x) convert_to(correct_decimal_sign(x), as.numeric, ERROR_VAL_NUMERIC, dplyr::cur_column(), id = id)
+            ),
+            dplyr::across(
+                schema %>% dplyr::select(tidyselect::where(is.logical)) %>% names(),
+                \(x) convert_to(x, as.logical, FALSE, dplyr::cur_column(), id = id)
+            ),
+            dplyr::across(
+                schema %>% dplyr::select(tidyselect::where(lubridate::is.Date)) %>% names(),
+                \(x) convert_to(fix_digit_date(x), parse_dates, as.Date(ERROR_VAL_DATE), dplyr::cur_column(), id = id)
+            ),
+            dplyr::across(
+                schema %>% dplyr::select(tidyselect::where(is.integer)) %>% names(),
+                \(x) convert_to(x, function(x) as.integer(round(as.double(x))), ERROR_VAL_NUMERIC, dplyr::cur_column(), id = id)
+            )
+        ) %>%
+        # 3. fix remaining problems in the target data type
+        dplyr::mutate(
+            # height and weight are needed to calculate bmi
+            height = transform_cm_to_m(height) %>%
+                cut_numeric_value(min = 0, max = 2.3, col_name = "height"),
+            weight = cut_numeric_value(weight, min = 0, max = 200, col_name = "weight"),
+            bmi = fix_bmi(weight, height, id) %>%
+                cut_numeric_value(min = 4, max = 60, "bmi"),
+            age = fix_age(age, dob, tracker_year, tracker_month, id) %>%
+                cut_numeric_value(min = 0, max = 25, "age"),
+            sex = fix_sex(sex, id),
+            hba1c_baseline = cut_numeric_value(hba1c_baseline, min = 4, max = 18, "hba1c_baseline"),
+            # https://www.cleveland19.com/story/1425584/ohio-man-holds-world-record-of-highest-blood-sugar/
+            fbg_baseline_mmol = cut_numeric_value(fbg_baseline_mmol, min = 0, max = 136.5, "fbg_baseline_mmol"),
+            # https://www.cleveland19.com/story/1425584/ohio-man-holds-world-record-of-highest-blood-sugar/
+            fbg_updated_mmol = cut_numeric_value(fbg_updated_mmol, min = 0, max = 136.5, "fbg_updated_mmol"),
+            blood_pressure_sys_mmhg = cut_numeric_value(blood_pressure_sys_mmhg, min = 20, max = 250, "blood_pressure_sys_mmhg"),
+            blood_pressure_dias_mmhg = cut_numeric_value(blood_pressure_dias_mmhg, min = 20, max = 220, "blood_pressure_dias_mmhg"),
+            tracker_date = lubridate::ym(paste(tracker_year, tracker_month, sep = "-")),
+            !!!parse_character_cleaning_config(a4d:::config$cleaning),
+            # should be fixed last as other fix functions use id to log invalid rows!
+            id = fix_id(id)
+        ) %>%
+        dplyr::ungroup()
+
+    # add clinic and country code after having fixed all issues with patient id
+    cc_codes <- extract_country_clinic_code(df_patient)
+    df_patient["clinic_code"] <- cc_codes$clinic_code
+    df_patient["country_code"] <- cc_codes$country_code
+
+    # Formula to calculate mmol/l from mg/dl: mmol/l = mg/dl / 18
+    if (all(is.na(df_patient$fbg_baseline_mmol))) {
+        df_patient <- df_patient %>%
+            dplyr::mutate(fbg_baseline_mmol = dplyr::case_when(
+                fbg_baseline_mg != ERROR_VAL_NUMERIC ~ fbg_baseline_mg / 18
+            ))
+    }
+    if (all(is.na(df_patient$fbg_updated_mmol))) {
+        df_patient <- df_patient %>%
+            dplyr::mutate(fbg_updated_mmol = dplyr::case_when(
+                fbg_updated_mg != ERROR_VAL_NUMERIC ~ fbg_updated_mg / 18
+            ))
+    }
+
+    # Formula to calculate mg/dl from mmol/l: mg/dl = 18 × mmol/l
+    if (all(is.na(df_patient$fbg_baseline_mg))) {
+        df_patient <- df_patient %>%
+            dplyr::mutate(fbg_baseline_mg = dplyr::case_when(
+                fbg_baseline_mmol != ERROR_VAL_NUMERIC ~ fbg_baseline_mmol * 18
+            ))
+    }
+    if (all(is.na(df_patient$fbg_updated_mg))) {
+        df_patient <- df_patient %>%
+            dplyr::mutate(fbg_updated_mg = dplyr::case_when(
+                fbg_updated_mmol != ERROR_VAL_NUMERIC ~ fbg_updated_mmol * 18
+            ))
+    }
+
+    # sort by year and month like it is in the tracker files
+    df_patient <- df_patient %>%
+        dplyr::arrange(tracker_date, id)
+
+    ParallelLogger::logDebug(
+        "df_patient dim: ",
+        dim(df_patient) %>% as.data.frame(),
+        "."
+    )
+
+    export_data_as_parquet(
+        data = df_patient,
+        filename = stringr::str_replace(patient_file_name, "_patient_raw", ""),
+        output_root = output_root,
+        suffix = "_patient_cleaned"
+    )
+}
+
+
+#' Process and clean product raw data
+#'
+#' @description
+#' A short description...
+#'
+#'
+#' @param product_file Path to product raw parquet file from first script.
+#' @param paths List with paths to patient_data_cleaned and product_data_cleaned folders.
+#' @param p progressor from progressr package.
+#'
+#' @export
+process_product_file <- function(product_file, paths, p) {
+    p()
+    synonyms <- get_synonyms()
+    synonyms_product <- synonyms$product
+    product_file_name <- tools::file_path_sans_ext(basename(product_file))
+    product_file_path <-
+        file.path(paths$tracker_root, product_file)
+
+    ParallelLogger::logDebug("Start process_product_file.")
+    ParallelLogger::logInfo(
+        "Current file: ",
+        product_file_name
+    )
+    df_product_raw <- arrow::read_parquet(product_file_path)
+
+    logfile <- paste0(product_file_name)
+    with_file_logger(logfile,
+        {
+            tryCatch(
+                df_product_raw <- reading_product_data_step2(df_product_raw, synonyms_product),
+                error = function(e) {
+                    ParallelLogger::logError("Could not process raw product data. Error = ", e$message, ".")
+                },
+                warning = function(w) {
+                    ParallelLogger::logWarn("Could not process raw product data. Warning = ", w$message, ".")
+                }
+            )
+
+            ParallelLogger::logDebug(
+                "df_product_raw dim: ",
+                dim(df_product_raw) %>% as.data.frame(),
+                "."
+            )
+
+            export_data_as_parquet(
+                data = df_product_raw,
+                filename = stringr::str_replace(product_file_name, "_product_raw", ""),
+                output_root = paths$product_data_cleaned,
+                suffix = "_product_cleaned"
+            )
+        },
+        output_root = paths$output_root
+    )
+
+    ParallelLogger::logInfo("Finish process_product_file.")
+}
diff --git a/R/link_product_patient.R b/R/link_product_patient.R
index 119de01..8816d57 100644
--- a/R/link_product_patient.R
+++ b/R/link_product_patient.R
@@ -14,7 +14,7 @@
 #' link_product_patient("path/to/product_data.parquet", "path/to/patient_data.parquet")
 #' }
 link_product_patient <- function(product_file, patient_file) {
-    logInfo("Trying to link product file ", product_file, " with patient file ", patient_file)
+    ParallelLogger::logInfo("Trying to link product file ", product_file, " with patient file ", patient_file)
 
     patient_data <- arrow::read_parquet(patient_file)
     product_data <- arrow::read_parquet(product_file)
@@ -44,7 +44,7 @@ link_product_patient <- function(product_file, patient_file) {
         tryCatch(
             {
                 if (nrow(summary_df) > 0) {
-                    logWarn(
+                    ParallelLogger::logWarn(
                         "The number of mismatched patient IDs between the product and patient data is ",
                         nrow(summary_df), ". ",
                         paste("File Name: ", summary_df$file_name,
@@ -56,18 +56,18 @@ link_product_patient <- function(product_file, patient_file) {
                 }
             },
             error = function(e) {
-                logError("Could not link csv files for product and patient data. Error: ", e$message)
+                ParallelLogger::logError("Could not link csv files for product and patient data. Error: ", e$message)
             },
             warning = function(w) {
-                logWarn("Could not link csv files for product and patient data. Warning: ", w$message)
+                ParallelLogger::logWarn("Could not link csv files for product and patient data. Warning: ", w$message)
             }
         )
     } else {
-        logInfo(
+        ParallelLogger::logInfo(
             "There are no mismatched patient IDs between the product data - ",
             product_file, " and patient data - ", patient_file
         )
     }
 
-    logInfo("Finished attempting to link product csv file with patient csv file.")
+    ParallelLogger::logInfo("Finished attempting to link product csv file with patient csv file.")
 }
diff --git a/R/logger.R b/R/logger.R
index 57958bd..e2a111b 100644
--- a/R/logger.R
+++ b/R/logger.R
@@ -9,17 +9,17 @@ setup_logger <- function(output_dir, log_name) {
         file.remove(logFileName)
     }
 
-    logger <- createLogger(
+    logger <- ParallelLogger::createLogger(
         name = "MAIN",
         threshold = "TRACE",
         appenders = list(
-            createFileAppender(
-                layout = layoutParallel,
+            ParallelLogger::createFileAppender(
+                layout = ParallelLogger::layoutParallel,
                 fileName = logFileName
             )
         )
     )
-    registerLogger(logger)
+    ParallelLogger::registerLogger(logger)
 
     log_dir <- file.path(output_dir, "logs")
 
@@ -33,14 +33,15 @@ setup_logger <- function(output_dir, log_name) {
 #' @param output_root Output root directory for the current process.
 #'
 #' @return returns the loggers that where previously set for usage with with_
+#' @export
 setup_file_logger <- function(logfile, output_root) {
-    loggers <- getLoggers()
-    clearLoggers()
+    loggers <- ParallelLogger::getLoggers()
+    ParallelLogger::clearLoggers()
     logFileName <- file.path(output_root, "logs", paste0(logfile, ".log"))
     if (file.exists(logFileName)) {
         file.remove(logFileName)
     }
-    addDefaultFileLogger(logFileName, name = logfile)
+    ParallelLogger::addDefaultFileLogger(logFileName, name = logfile)
     loggers
 }
 
@@ -58,9 +59,9 @@ setup_file_logger <- function(logfile, output_root) {
 with_file_logger <- withr::with_(
     setup_file_logger,
     function(loggers) {
-        clearLoggers()
+        ParallelLogger::clearLoggers()
         for (logger in loggers) {
-            registerLogger(logger)
+            ParallelLogger::registerLogger(logger)
         }
     }
 )
diff --git a/R/read_cleaned_patient_data.R b/R/read_cleaned_patient_data.R
index ae56e34..b217da4 100644
--- a/R/read_cleaned_patient_data.R
+++ b/R/read_cleaned_patient_data.R
@@ -7,7 +7,7 @@
 #' @export
 read_cleaned_patient_data <-
     function(input_root, patient_data_files) {
-        logInfo("Start read_cleaned_patient_data")
+        ParallelLogger::logInfo("Start read_cleaned_patient_data")
 
         patient_data <- patient_data_files %>%
             purrr::map(function(patient_file) {
@@ -17,6 +17,6 @@ read_cleaned_patient_data <-
 
 
 
-        logInfo("Finish read_cleaned_patient_data")
+        ParallelLogger::logInfo("Finish read_cleaned_patient_data")
         patient_data
     }
diff --git a/R/read_patient_data.R b/R/read_patient_data.R
index b292f1c..61f6839 100644
--- a/R/read_patient_data.R
+++ b/R/read_patient_data.R
@@ -1,9 +1,9 @@
 reading_patient_data <-
     function(tracker_data_file, columns_synonyms) {
-        logDebug("Start reading_patient_data.")
+        ParallelLogger::logDebug("Start reading_patient_data.")
         sheet_list <- readxl::excel_sheets(tracker_data_file)
         testit::assert(length(sheet_list) > 0)
-        logInfo(
+        ParallelLogger::logInfo(
             "Found ",
             length(sheet_list),
             " sheets inside the current file = ",
@@ -13,7 +13,7 @@ reading_patient_data <-
 
         month_list <-
             sheet_list[na.omit(pmatch(month.abb, sheet_list))]
-        logInfo(
+        ParallelLogger::logInfo(
             "Found ",
             length(month_list),
             " month sheets inside the current file = ",
@@ -24,18 +24,18 @@ reading_patient_data <-
 
         # Extract year
         year <- get_tracker_year(tracker_data_file, month_list)
-        logInfo("Tracker year = ", year, ".")
+        ParallelLogger::logInfo("Tracker year = ", year, ".")
         testit::assert(year %in% c(2017, 2018, 2019, 2020, 2021, 2022))
 
         tidy_tracker_list <- NULL
 
-        logDebug("Start processing sheets.")
+        ParallelLogger::logDebug("Start processing sheets.")
         for (curr_sheet in month_list) {
-            logDebug("Start processing sheet ", curr_sheet, ".")
+            ParallelLogger::logDebug("Start processing sheet ", curr_sheet, ".")
 
             df_patient <- extract_patient_data(tracker_data_file, curr_sheet, year)
             testit::assert(nrow(df_patient) > 0)
-            logDebug("df_patient dim: ", dim(df_patient) %>% as.data.frame(), ".")
+            ParallelLogger::logDebug("df_patient dim: ", dim(df_patient) %>% as.data.frame(), ".")
 
             df_patient <-
                 harmonize_patient_data_columns(df_patient, columns_synonyms)
@@ -66,15 +66,15 @@ reading_patient_data <-
                 )
 
             tidy_tracker_list[[curr_sheet]] <- df_patient
-            logDebug("Finish processing sheet ", curr_sheet, ".")
+            ParallelLogger::logDebug("Finish processing sheet ", curr_sheet, ".")
         }
 
-        logDebug("Start combining sheet data into single data frame.")
+        ParallelLogger::logDebug("Start combining sheet data into single data frame.")
         df_raw <- dplyr::bind_rows(tidy_tracker_list)
-        logDebug("Finish combining sheet data into single data frame.")
+        ParallelLogger::logDebug("Finish combining sheet data into single data frame.")
 
         if ("Patient List" %in% sheet_list) {
-            logDebug("Start extracting patient list.")
+            ParallelLogger::logDebug("Start extracting patient list.")
             patient_list <- extract_patient_data(
                 tracker_data_file,
                 "Patient List",
@@ -98,9 +98,9 @@ reading_patient_data <-
                 by = "id",
                 relationship = "many-to-one"
             )
-            logDebug("Finish extracting patient list.")
+            ParallelLogger::logDebug("Finish extracting patient list.")
         }
 
-        logInfo("Finish reading_patient_data.")
+        ParallelLogger::logInfo("Finish reading_patient_data.")
         return(df_raw)
     }
diff --git a/R/read_product_data.R b/R/read_product_data.R
index 10d8886..d1fc051 100644
--- a/R/read_product_data.R
+++ b/R/read_product_data.R
@@ -4,7 +4,7 @@
 # function based on parts from run_a4d_product_data.R and helper functions
 reading_product_data_step1 <-
     function(tracker_data_file, columns_synonyms) {
-        logDebug("Start reading_product_data_step1.")
+        ParallelLogger::logDebug("Start reading_product_data_step1.")
         # rename column names to match
         colnames(columns_synonyms) <- c("name_clean", "name_to_be_matched")
 
@@ -13,13 +13,13 @@ reading_product_data_step1 <-
         month_list <- sheet_list[na.omit(pmatch(month.abb, sheet_list))]
         year <- get_tracker_year(tracker_data_file, month_list)
 
-        logDebug("Found ", length(sheet_list), " sheets: ", paste(sheet_list, collapse = ", "), ".")
-        logDebug("Found ", length(month_list), " month sheets: ", paste(month_list, collapse = ", "), ".")
+        ParallelLogger::logDebug("Found ", length(sheet_list), " sheets: ", paste(sheet_list, collapse = ", "), ".")
+        ParallelLogger::logDebug("Found ", length(month_list), " month sheets: ", paste(month_list, collapse = ", "), ".")
 
 
         # loop through all months
         for (curr_sheet in month_list) {
-            logDebug("Start processing the following sheet: ", curr_sheet)
+            ParallelLogger::logDebug("Start processing the following sheet: ", curr_sheet)
 
             # open tracker data
             tracker_data <- data.frame(
@@ -37,7 +37,7 @@ reading_product_data_step1 <-
                     grepl("Description of Support", tracker_data[, ])))
             ) {
                 # go to next month
-                logInfo("Could not find product data in tracker data. Skipping ", curr_sheet, ".")
+                ParallelLogger::logInfo("Could not find product data in tracker data. Skipping ", curr_sheet, ".")
                 next
             }
 
@@ -46,7 +46,7 @@ reading_product_data_step1 <-
 
             # If after extraction, dataframe is empty, this iteration is also skipped.
             if (all(is.na(product_df))) {
-                logInfo("Product data is empty. Skipping ", curr_sheet, ".")
+                ParallelLogger::logInfo("Product data is empty. Skipping ", curr_sheet, ".")
                 next
             }
 
@@ -69,11 +69,11 @@ reading_product_data_step1 <-
             tryCatch(
                 {
                     if (num_na_rows > 0) {
-                        logInfo(curr_sheet, " the number of rows where the patient's name is missing: ", col_released, " is not NA and ", col_released_to, " (patient's name) is NA = ", num_na_rows)
+                        ParallelLogger::logInfo(curr_sheet, " the number of rows where the patient's name is missing: ", col_released, " is not NA and ", col_released_to, " (patient's name) is NA = ", num_na_rows)
                     }
                 },
                 error = function(e) {
-                    logError(curr_sheet, " trying with num_na_rows for products. Error: ", e$message)
+                    ParallelLogger::logError(curr_sheet, " trying with num_na_rows for products. Error: ", e$message)
                 }
             )
 
@@ -83,7 +83,7 @@ reading_product_data_step1 <-
             tryCatch(
                 {
                     if (nrow(non_processed_dates) > 0) {
-                        logWarn(
+                        ParallelLogger::logWarn(
                             curr_sheet,
                             " the number of rows with non-processed dates in product_entry_date is ",
                             nrow(non_processed_dates), ": ",
@@ -92,7 +92,7 @@ reading_product_data_step1 <-
                     }
                 },
                 error = function(e) {
-                    logError(curr_sheet, " trying with non_processed_dates in product_entry_date. Error: ", e$message)
+                    ParallelLogger::logError(curr_sheet, " trying with non_processed_dates in product_entry_date. Error: ", e$message)
                 }
             )
 
@@ -107,7 +107,7 @@ reading_product_data_step1 <-
             # Check if the entry dates for the balance match the month/year on the sheet
             check_entry_dates(product_df, curr_sheet)
 
-            logDebug("Finish processing sheet: ", curr_sheet)
+            ParallelLogger::logDebug("Finish processing sheet: ", curr_sheet)
 
             # combine all months
             if (!exists("df_final")) {
@@ -121,7 +121,7 @@ reading_product_data_step1 <-
         } else {
             return(NULL)
         }
-        logDebug("Finish reading_product_data_step1.")
+        ParallelLogger::logDebug("Finish reading_product_data_step1.")
     }
 
 
@@ -151,7 +151,7 @@ count_na_rows <- function(df, units_released_col, released_to_col) {
 #'
 #' @return This function does not return a value. It logs a warning message if there are any dates in 'product_entry_date' that don't match the month/year on the sheet.
 check_entry_dates <- function(df, Sheet) {
-    logDebug("Start check_entry_dates.")
+    ParallelLogger::logDebug("Start check_entry_dates.")
     # Check if the entry dates for the balance match the month/year on the sheet
     entry_dates_df <- df %>% dplyr::filter(grepl("^[0-9]+$", product_entry_date))
 
@@ -167,14 +167,14 @@ check_entry_dates <- function(df, Sheet) {
     not_same <- entry_dates_df[entry_dates_df$ed_month != entry_dates_df$product_table_month |
         entry_dates_df$ed_year != entry_dates_df$product_table_year, ]
     if (nrow(not_same) > 0) {
-        logWarn(
+        ParallelLogger::logWarn(
             Sheet,
             " the number of dates in product_entry_date that don't match the month/year on the sheet is ",
             nrow(not_same), ": ",
             paste(not_same$ed_date, collapse = ", ")
         )
     }
-    logDebug("Finish check_entry_dates.")
+    ParallelLogger::logDebug("Finish check_entry_dates.")
 }
 
 #' @title Remove Rows with NA Values in Specified Columns.
@@ -192,7 +192,7 @@ remove_rows_with_na_columns <-
         na_rows <- apply(df[column_names], 1, function(x) all(is.na(x)))
 
         # log message
-        logInfo(paste(length(na_rows[na_rows == T]), " rows deleted out of ", nrow(df), " rows (reason: rows not containing additional info).", sep = ""))
+        ParallelLogger::logInfo(paste(length(na_rows[na_rows == T]), " rows deleted out of ", nrow(df), " rows (reason: rows not containing additional info).", sep = ""))
 
         # Return the data frame without the NA rows
         return(df[!na_rows, ])
@@ -215,7 +215,7 @@ check_negative_balance <- function(df, Sheet) {
     # Check if there are any rows in the new data frame
     if (nrow(negative_df) > 0) {
         # Log a warning message with the number of negative values and their corresponding product_balance values
-        logWarn(
+        ParallelLogger::logWarn(
             Sheet,
             " number of negative values in product_balance on the sheet is ",
             nrow(negative_df), ": ",
@@ -242,7 +242,7 @@ switch_columns_stock <-
                     "product_units_received" = "product_received_from",
                     "product_received_from" = "product_units_received"
                 )
-            logDebug("Columns product_units_received and product_received_from were switched")
+            ParallelLogger::logDebug("Columns product_units_received and product_received_from were switched")
             return(df)
         } else {
             return(df)
@@ -290,14 +290,14 @@ report_unknown_products <- function(df, Sheet, stock_list_df) {
     # Check if there are any unknown products names
     if (length(unmatched_products) > 0) {
         # Log a warning message with the number of unknown products names
-        logWarn(
+        ParallelLogger::logWarn(
             Sheet,
             " the number of unknown product names on the sheet is ",
             length(unmatched_products), ": ",
             paste(unmatched_products, collapse = ", ")
         )
     } else {
-        logInfo(Sheet, " no unknown product names on the sheet")
+        ParallelLogger::logInfo(Sheet, " no unknown product names on the sheet")
     }
 }
 
@@ -317,16 +317,16 @@ report_unknown_products <- function(df, Sheet, stock_list_df) {
 #' product_list <- load_product_reference_data("your_file.xlsx")
 #' }
 load_product_reference_data <- function(stock_summary_xlsx = "reference_data/master_tracker_variables.xlsx") {
-    logDebug("Trying to load the product list from the Stock Summary, ", stock_summary_xlsx, "...")
+    ParallelLogger::logDebug("Trying to load the product list from the Stock Summary, ", stock_summary_xlsx, "...")
     tryCatch(
         {
             product_names_df <- readxl::read_excel(stock_summary_xlsx, "Stock_Summary")
             colnames(product_names_df) <- tolower(colnames(product_names_df))
-            logDebug(nrow(product_names_df), " product names were loaded from the Stock Summary.")
+            ParallelLogger::logDebug(nrow(product_names_df), " product names were loaded from the Stock Summary.")
             return(product_names_df)
         },
         error = function(e) {
-            logError("Error in loading stock product list: ", e)
+            ParallelLogger::logError("Error in loading stock product list: ", e)
         }
     )
 }
@@ -371,7 +371,7 @@ add_product_categories <- function(inventory_data, product_category_mapping) {
 #' df <- extract_unit_capacity(df, "product")
 #' }
 extract_unit_capacity <- function(df, column_name) {
-    logDebug("Trying to extract Unit Capacity from ", column_name, " column")
+    ParallelLogger::logDebug("Trying to extract Unit Capacity from ", column_name, " column")
 
     # Extract all symbols between parentheses
     df$product_unit_capacity <- stringr::str_extract(df[[column_name]], "\\(([^)]+)\\)")
@@ -393,7 +393,7 @@ extract_unit_capacity <- function(df, column_name) {
     # Add 1 to NA values
     df$product_unit_capacity[is.na(df$product_unit_capacity)] <- 1
 
-    logDebug("Finished extracting Unit Capacity from ", column_name, " column")
+    ParallelLogger::logDebug("Finished extracting Unit Capacity from ", column_name, " column")
 
     return(df)
 }
@@ -410,7 +410,7 @@ extract_unit_capacity <- function(df, column_name) {
 #' @return Cleaned product data for one specified tracker.
 reading_product_data_step2 <-
     function(df, columns_synonyms) {
-        logDebug("Start reading_product_data_step2.")
+        ParallelLogger::logDebug("Start reading_product_data_step2.")
 
         # rename column names to match
         colnames(columns_synonyms) <- c("name_clean", "name_to_be_matched")
@@ -425,7 +425,7 @@ reading_product_data_step2 <-
 
         # loop through all months
         for (sheet_month in unique(df$product_sheet_name)) {
-            logDebug(paste("Start processing the following sheet:", sheet_month))
+            ParallelLogger::logDebug(paste("Start processing the following sheet:", sheet_month))
 
             # filter on month sheet
             product_df <- df %>%
@@ -453,7 +453,7 @@ reading_product_data_step2 <-
             product_df <- remove_rows_with_na_columns(product_df, column_names_check)
             # jump to next sheet if dataframe empty from here
             if (nrow(product_df) == 0) {
-                logDebug(paste(sheet_month, " sheet is empty after filtering and skipped", sep = ""))
+                ParallelLogger::logDebug(paste(sheet_month, " sheet is empty after filtering and skipped", sep = ""))
                 next
             }
 
@@ -527,12 +527,12 @@ reading_product_data_step2 <-
             df_final <- df_final %>%
                 rbind(product_df)
 
-            logDebug(paste("Finished processing the following sheet:", sheet_month))
+            ParallelLogger::logDebug(paste("Finished processing the following sheet:", sheet_month))
         }
 
         if (nrow(df_final) > 0) {
             return(df_final)
         } else {
-            logDebug(paste("No product data extracted for the following tracker:", df$file_name[1]))
+            ParallelLogger::logDebug(paste("No product data extracted for the following tracker:", df$file_name[1]))
         }
     }
diff --git a/R/sysdata.rda b/R/sysdata.rda
index eb303cf804b77f1badf0b2d97168f0a3f6920e66..be6b002cff04d1329d0fdca86a74da91bb6a0978 100644
GIT binary patch
delta 19
XcmX>tdRmlAzMF#q445}^9pnN4Eu91g

delta 19
XcmX>tdRmlAzMF#q4A?eu9pnN4Evy6y

diff --git a/man/process_patient_file.Rd b/man/process_patient_file.Rd
new file mode 100644
index 0000000..69c9a62
--- /dev/null
+++ b/man/process_patient_file.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper_script_2.R
+\name{process_patient_file}
+\alias{process_patient_file}
+\title{Process and clean patient raw data}
+\usage{
+process_patient_file(patient_file, paths, p)
+}
+\arguments{
+\item{patient_file}{Path to patient raw parquet file from first script.}
+
+\item{paths}{List with paths to patient_data_cleaned and product_data_cleaned folders.}
+
+\item{p}{progressor from progressr package.}
+}
+\description{
+Read in the output of the first script as parquet (all character).
+Merge extracted patient data with the metadata schema that contains all
+necessary columns. Process each variable in three steps:
+\enumerate{
+\item Fix any problems with character representation.
+\item Convert column to target data type, for example a date or number.
+\item Apply any post-processing logic for the target data type.
+}
+}
diff --git a/man/process_product_file.Rd b/man/process_product_file.Rd
new file mode 100644
index 0000000..07d01d9
--- /dev/null
+++ b/man/process_product_file.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/helper_script_2.R
+\name{process_product_file}
+\alias{process_product_file}
+\title{Process and clean product raw data}
+\usage{
+process_product_file(product_file, paths, p)
+}
+\arguments{
+\item{product_file}{Path to product raw parquet file from first script.}
+
+\item{paths}{List with paths to patient_data_cleaned and product_data_cleaned folders.}
+
+\item{p}{progressor from progressr package.}
+}
+\description{
+A short description...
+}
diff --git a/man/process_tracker_file.Rd b/man/process_tracker_file.Rd
index b9728ae..2453d08 100644
--- a/man/process_tracker_file.Rd
+++ b/man/process_tracker_file.Rd
@@ -4,14 +4,14 @@
 \alias{process_tracker_file}
 \title{Process a single tracker file and extract patient and product data.}
 \usage{
-process_tracker_file(paths, tracker_file, tracker_name)
+process_tracker_file(tracker_file, paths, p)
 }
 \arguments{
-\item{paths}{a list with the paths to the tracker root dir, the patient and product output dir and the root output dir.}
+\item{tracker_file}{Filename of the tracler.}
 
-\item{tracker_file}{filename.}
+\item{paths}{a list with the paths to the tracker root dir, the patient and product output dir and the root output dir.}
 
-\item{tracker_name}{filename without extension.}
+\item{p}{progressor from progressr package.}
 }
 \description{
 Process a single tracker file and extract patient and product data.
diff --git a/reference_data/build_package_data.R b/reference_data/build_package_data.R
index 3d34392..7d3dc8c 100644
--- a/reference_data/build_package_data.R
+++ b/reference_data/build_package_data.R
@@ -1,5 +1,5 @@
 cleaning_config <- yaml::read_yaml(here::here("reference_data", "data_cleaning.yaml"))
-allowed_provinces <- yaml::read_yaml(here::here("reference_data", "provinces", "allowed_provinces.yaml")) %>% unlist()
+allowed_provinces <- yaml::read_yaml(here::here("reference_data", "provinces", "allowed_provinces.yaml")) |> unlist()
 
 for (i in length(cleaning_config$province$steps)) {
     if (cleaning_config$province$steps[[i]]$type == "allowed_values") {
diff --git a/scripts/run_script_1_extract_raw_data.R b/scripts/run_script_1_extract_raw_data.R
index d4961ca..696e610 100644
--- a/scripts/run_script_1_extract_raw_data.R
+++ b/scripts/run_script_1_extract_raw_data.R
@@ -1,11 +1,11 @@
 options(readxl.show_progress = FALSE)
-future::plan(future::multisession, workers = 4)
+future::plan("multisession")
 
-Sys.setenv(A4D_DATA_ROOT = "data")
 paths <- a4d::init_paths(c("patient_data_raw", "product_data_raw"), delete = TRUE)
-setup_logger(paths$output_root, "script1")
+a4d::setup_logger(paths$output_root, "script1")
 tracker_files <- a4d::get_files(paths$tracker_root)
-logInfo(
+
+ParallelLogger::logInfo(
     "Found ",
     length(tracker_files),
     " xlsx files under ",
@@ -13,7 +13,7 @@ logInfo(
     "."
 )
 
-logInfo("Start processing tracker files.")
+ParallelLogger::logInfo("Start processing tracker files.")
 
 progressr::with_progress({
     p <- progressr::progressor(steps = length(tracker_files))
@@ -21,11 +21,11 @@ progressr::with_progress({
     result <- furrr::future_map(
         tracker_files,
         a4d::process_tracker_file,
-        paths=paths,
-        p=p
+        paths = paths,
+        p = p
     )
 })
 
-logInfo("Finish processing all tracker files.")
+ParallelLogger::logInfo("Finish processing all tracker files.")
 
-clearLoggers()
+ParallelLogger::clearLoggers()
diff --git a/scripts/run_script_2_clean_data.R b/scripts/run_script_2_clean_data.R
index e20727a..03c36da 100644
--- a/scripts/run_script_2_clean_data.R
+++ b/scripts/run_script_2_clean_data.R
@@ -1,371 +1,53 @@
 options(readxl.show_progress = FALSE)
-
-ERROR_VAL_NUMERIC <<- 999999
-ERROR_VAL_CHARACTER <<- "Undefined"
-ERROR_VAL_DATE <<- "9999-09-09"
-
-main <- function() {
-    paths <- init_paths(c("patient_data_cleaned", "product_data_cleaned"), delete = TRUE)
-    setup_logger(paths$output_root, "script2")
-    patient_data_files <- get_files(paths$tracker_root, pattern = "patient_raw.parquet$")
-    product_data_files <- get_files(paths$tracker_root, pattern = "product_raw.parquet$")
-    logInfo(
-        "Found ",
-        length(patient_data_files),
-        " patient csv files under ",
-        paths$tracker_root,
-        "."
-    )
-    logInfo(
-        "Found ",
-        length(product_data_files),
-        " product csv files under ",
-        paths$tracker_root,
-        "."
-    )
-
-    logInfo("Start processing patient csv files.")
-
-    for (i in seq_along(patient_data_files)) {
-        patient_file <- patient_data_files[i]
-        patient_file_name <- tools::file_path_sans_ext(basename(patient_file))
-        logfile <- paste0(patient_file_name)
-        with_file_logger(logfile,
-            {
-                tryCatch(
-                    process_patient_file(paths, patient_file, patient_file_name, paths$patient_data_cleaned),
-                    error = function(e) {
-                        logError("Could not process raw patient data. Error = ", e$message, ".")
-                    },
-                    warning = function(w) {
-                        logWarn("Could not process raw patient data. Warning = ", w$message, ".")
-                    }
-                )
-            },
-            output_root = paths$output_root
-        )
-        cat(paste("Processed ", i, " of ", length(patient_data_files), " (", round(i / length(patient_data_files) * 100, 0), "%) patient files.\n"))
-    }
-
-    logInfo("Finish processing all patient csv files.")
-
-    logDebug("Start processing product csv files.")
-    synonyms <- get_synonyms()
-    synonyms_product <- synonyms$product
-
-    for (i in seq_along(product_data_files)) {
-        product_file <- product_data_files[i]
-        product_file_name <- tools::file_path_sans_ext(basename(product_file))
-        logfile <- paste0(product_file_name)
-
-        with_file_logger(logfile,
-            {
-                tryCatch(
-                    process_product_file(paths, product_file, product_file_name, synonyms_product, paths$product_data_cleaned),
-                    error = function(e) {
-                        logError("Could not process raw product data. Error = ", e$message, ".")
-                    },
-                    warning = function(w) {
-                        logWarn("Could not process raw product data. Warning = ", w$message, ".")
-                    }
-                )
-            },
-            output_root = paths$output_root
-        )
-        cat(paste("Processed ", i, " of ", length(product_data_files), " (", round(i / length(product_data_files) * 100, 0), "%) product files.\n"))
-    }
-
-    logInfo("Finish processing all csv files.")
-}
-
-
-process_patient_file <- function(paths, patient_file, patient_file_name, output_root) {
-    patient_file_path <-
-        file.path(paths$tracker_root, patient_file)
-    logDebug("Start process_patient_file.")
-    logInfo(
-        "Current file: ",
-        patient_file_name
-    )
-
-    allowed_provinces <- get_allowed_provinces()
-
-    df_patient_raw <- arrow::read_parquet(patient_file_path)
-
-    # filter all rows with no patient id or patient name
-    df_patient_raw <- df_patient_raw %>%
-        dplyr::filter(!(is.na(id) & is.na(name))) %>%
-        dplyr::filter(!(id == "0" & name == "0"))
-
-    # --- TRANSFORMATIONS ---
-    # data before 2019 had only one column for updated hba1c and fbg
-    # with date as part of the value
-    if (!"hba1c_updated_date" %in% colnames(df_patient_raw) && "hba1c_updated" %in% colnames(df_patient_raw)) {
-        logInfo("Column updated_hba1c_date not found. Trying to parse from hba1c_updated.")
-        df_patient_raw <-
-            extract_date_from_measurement(df_patient_raw, "hba1c_updated")
-        logDebug("Finished parsing dates from hba1c_updated.")
-    }
-
-    if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mg" %in% colnames(df_patient_raw)) {
-        logInfo("Column updated_fbg_date not found. Trying to parse from fbg_updated_mg.")
-        df_patient_raw <-
-            extract_date_from_measurement(df_patient_raw, "fbg_updated_mg")
-        logDebug("Finished parsing dates from fbg_updated_mg.")
-    }
-
-    if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mmol" %in% colnames(df_patient_raw)) {
-        logInfo("Column fbg_updated_date not found. Trying to parse from fbg_updated_mmol.")
-        df_patient_raw <-
-            extract_date_from_measurement(df_patient_raw, "fbg_updated_mmol")
-        logDebug("Finished parsing dates from fbg_updated_mmol.")
-    }
-
-    # blood pressure is given as sys/dias value pair,
-    # so we split this column in two separate columns
-    if ("blood_pressure_mmhg" %in% colnames(df_patient_raw)) {
-        df_patient_raw <- split_bp_in_sys_and_dias(df_patient_raw)
-    }
-
-    # The maximum value available for hba1c will be around 14% - 18%,
-    # depending on the equipment being used.
-    # If the reading is above the maximum available value the > sign is used -
-    # we would prefer to retain this character in the database as it is important for data analysis.
-    logInfo("Adding columns hba1c_baseline_exceeds and hba1c_updated_exceeds.")
-    df_patient_raw <- df_patient_raw %>%
-        dplyr::mutate(
-            hba1c_baseline_exceeds = ifelse(grepl(">|<", hba1c_baseline), TRUE, FALSE),
-            hba1c_updated_exceeds = ifelse(grepl(">|<", hba1c_updated), TRUE, FALSE)
-        )
-
-    # --- META SCHEMA ---
-    # meta schema has all final columns for the database
-    # along with their corresponding data types
-    logInfo("Creating meta schema.")
-    # short type string for read_csv:
-    # iiinDccDcnnDnncnlnlDncDccDDDccccDccccciDciiiDn
-    schema <- tibble::tibble(
-        # clinic_visit = logical(),
-        # complication_screening = character(),
-        # complication_screening_date = lubridate::as_date(1),
-        # complication_screening_results = character(),
-        # dm_complication_comment = character(), # TODO
-        # dm_complication_eye = character(), # TODO
-        # dm_complication_kidney = character(), # TODO
-        # dm_complication_other = character(), # TODO
-        # est_strips_pmonth = integer(),
-        # family_support_scale = character(), # TODO
-        # inactive_reason = character(),
-        # insulin_dosage = character(),
-        # meter_received_date = lubridate::as_date(1), # TODO
-        # remarks = character(),
-        # remote_followup = logical(),
-        # additional_support = character(),
-        age = integer(),
-        blood_pressure_dias_mmhg = integer(),
-        blood_pressure_sys_mmhg = integer(),
-        bmi = numeric(),
-        bmi_date = lubridate::as_date(1),
-        clinic_code = character(),
-        country_code = character(),
-        dob = lubridate::as_date(1),
-        edu_occ = character(),
-        fbg_baseline_mg = numeric(),
-        fbg_baseline_mmol = numeric(),
-        fbg_updated_date = lubridate::as_date(1),
-        fbg_updated_mg = numeric(),
-        fbg_updated_mmol = numeric(),
-        file_name = character(),
-        hba1c_baseline = numeric(),
-        hba1c_baseline_exceeds = logical(),
-        hba1c_updated = numeric(),
-        hba1c_updated_exceeds = logical(),
-        hba1c_updated_date = lubridate::as_date(1),
-        height = numeric(),
-        hospitalisation_cause = character(),
-        hospitalisation_date = lubridate::as_date(1),
-        id = character(),
-        insulin_regimen = character(),
-        last_clinic_visit_date = lubridate::as_date(1),
-        last_remote_followup_date = lubridate::as_date(1),
-        lost_date = lubridate::as_date(1),
-        name = character(),
-        observations = character(),
-        observations_category = character(),
-        province = character(),
-        recruitment_date = lubridate::as_date(1),
-        sex = character(),
-        sheet_name = character(),
-        status = character(),
-        status_out = character(),
-        support_from_a4d = character(),
-        t1d_diagnosis_age = integer(),
-        t1d_diagnosis_date = lubridate::as_date(1),
-        t1d_diagnosis_with_dka = character(),
-        testing_frequency = integer(),
-        tracker_date = lubridate::as_date(1),
-        tracker_month = integer(),
-        tracker_year = integer(),
-        updated_2022_date = lubridate::as_date(1),
-        weight = numeric()
-    )
-
-    cols_extra <- colnames(df_patient_raw)[!colnames(df_patient_raw) %in% colnames(schema)]
-    logWarn("Extra columns in patient data: ", paste(cols_extra, collapse = ", "))
-
-    cols_missing <-
-        colnames(schema)[!colnames(schema) %in% colnames(df_patient_raw)]
-    logWarn("Missing columns in patient data: ", paste(cols_missing, collapse = ", "))
-
-    # add all columns of schema to df_patient_raw
-    # keep all rows, only append missing cols
-    logInfo("Merging df_patient with meta schema and selecting all columns of meta schema.")
-    df_patient <- merge.default(df_patient_raw, schema, all.x = T)
-    df_patient <- df_patient[colnames(schema)]
-
-    # the cleaning, fixing and validating happens in three major steps:
-    # 1. make sure we fix any known problems in the raw character columns
-    df_patient <-
-        df_patient %>%
-        dplyr::rowwise() %>%
-        # 1. handle known problems before converting to target type
-        dplyr::mutate(
-            t1d_diagnosis_age = fix_t1d_diagnosis_age(t1d_diagnosis_age, id),
-            hba1c_baseline = stringr::str_replace(hba1c_baseline, "<|>", ""),
-            hba1c_updated = stringr::str_replace(hba1c_updated, "<|>", ""),
-            fbg_baseline_mg = fix_fbg(fbg_baseline_mg),
-            fbg_baseline_mmol = fix_fbg(fbg_baseline_mmol),
-            fbg_updated_mg = fix_fbg(fbg_updated_mg),
-            fbg_updated_mmol = fix_fbg(fbg_updated_mmol),
-            testing_frequency = fix_testing_frequency(testing_frequency)
-        ) %>%
-        # 2. convert the refined character columns into the target data type
-        dplyr::mutate(
-            dplyr::across(
-                schema %>% dplyr::select(tidyselect::where(is.numeric)) %>% names(),
-                \(x) convert_to(correct_decimal_sign(x), as.numeric, ERROR_VAL_NUMERIC, dplyr::cur_column(), id = id)
-            ),
-            dplyr::across(
-                schema %>% dplyr::select(tidyselect::where(is.logical)) %>% names(),
-                \(x) convert_to(x, as.logical, FALSE, dplyr::cur_column(), id = id)
-            ),
-            dplyr::across(
-                schema %>% dplyr::select(tidyselect::where(lubridate::is.Date)) %>% names(),
-                \(x) convert_to(fix_digit_date(x), parse_dates, as.Date(ERROR_VAL_DATE), dplyr::cur_column(), id = id)
-            ),
-            dplyr::across(
-                schema %>% dplyr::select(tidyselect::where(is.integer)) %>% names(),
-                \(x) convert_to(x, function(x) as.integer(round(as.double(x))), ERROR_VAL_NUMERIC, dplyr::cur_column(), id = id)
-            )
-        ) %>%
-        # 3. fix remaining problems in the target data type
-        dplyr::mutate(
-            # height and weight are needed to calculate bmi
-            height = transform_cm_to_m(height) %>%
-                cut_numeric_value(min = 0, max = 2.3, col_name = "height"),
-            weight = cut_numeric_value(weight, min = 0, max = 200, col_name = "weight"),
-            bmi = fix_bmi(weight, height, id) %>%
-                cut_numeric_value(min = 4, max = 60, "bmi"),
-            age = fix_age(age, dob, tracker_year, tracker_month, id) %>%
-                cut_numeric_value(min = 0, max = 25, "age"),
-            sex = fix_sex(sex, id),
-            hba1c_baseline = cut_numeric_value(hba1c_baseline, min = 4, max = 18, "hba1c_baseline"),
-            # https://www.cleveland19.com/story/1425584/ohio-man-holds-world-record-of-highest-blood-sugar/
-            fbg_baseline_mmol = cut_numeric_value(fbg_baseline_mmol, min = 0, max = 136.5, "fbg_baseline_mmol"),
-            # https://www.cleveland19.com/story/1425584/ohio-man-holds-world-record-of-highest-blood-sugar/
-            fbg_updated_mmol = cut_numeric_value(fbg_updated_mmol, min = 0, max = 136.5, "fbg_updated_mmol"),
-            blood_pressure_sys_mmhg = cut_numeric_value(blood_pressure_sys_mmhg, min = 20, max = 250, "blood_pressure_sys_mmhg"),
-            blood_pressure_dias_mmhg = cut_numeric_value(blood_pressure_dias_mmhg, min = 20, max = 220, "blood_pressure_dias_mmhg"),
-            tracker_date = lubridate::ym(paste(tracker_year, tracker_month, sep="-")),
-            !!!parse_character_cleaning_config(a4d:::config$cleaning),
-            # should be fixed last as other fix functions use id to log invalid rows!
-            id = fix_id(id)
-        ) %>%
-        dplyr::ungroup()
-
-    # add clinic and country code after having fixed all issues with patient id
-    cc_codes <- extract_country_clinic_code(df_patient)
-    df_patient["clinic_code"] <- cc_codes$clinic_code
-    df_patient["country_code"] <- cc_codes$country_code
-
-    # Formula to calculate mmol/l from mg/dl: mmol/l = mg/dl / 18
-    if (all(is.na(df_patient$fbg_baseline_mmol))) {
-        df_patient <- df_patient %>%
-            dplyr::mutate(fbg_baseline_mmol = dplyr::case_when(
-                fbg_baseline_mg != ERROR_VAL_NUMERIC ~ fbg_baseline_mg / 18
-            ))
-    }
-    if (all(is.na(df_patient$fbg_updated_mmol))) {
-        df_patient <- df_patient %>%
-            dplyr::mutate(fbg_updated_mmol = dplyr::case_when(
-                fbg_updated_mg != ERROR_VAL_NUMERIC ~ fbg_updated_mg / 18
-            ))
-    }
-
-    # Formula to calculate mg/dl from mmol/l: mg/dl = 18 × mmol/l
-    if (all(is.na(df_patient$fbg_baseline_mg))) {
-        df_patient <- df_patient %>%
-            dplyr::mutate(fbg_baseline_mg = dplyr::case_when(
-                fbg_baseline_mmol != ERROR_VAL_NUMERIC ~ fbg_baseline_mmol * 18
-            ))
-    }
-    if (all(is.na(df_patient$fbg_updated_mg))) {
-        df_patient <- df_patient %>%
-            dplyr::mutate(fbg_updated_mg = dplyr::case_when(
-                fbg_updated_mmol != ERROR_VAL_NUMERIC ~ fbg_updated_mmol * 18
-            ))
-    }
-
-    # sort by year and month like it is in the tracker files
-    df_patient <- df_patient %>%
-        dplyr::arrange(tracker_date, id)
-
-    logDebug(
-        "df_patient dim: ",
-        dim(df_patient) %>% as.data.frame(),
-        "."
-    )
-
-    export_data_as_parquet(
-        data = df_patient,
-        filename = stringr::str_replace(patient_file_name, "_patient_raw", ""),
-        output_root = output_root,
-        suffix = "_patient_cleaned"
-    )
-
-    logInfo("Finish process_patient_file.")
-}
-
-
-process_product_file <- function(paths, product_file, product_file_name, synonyms_product, output_root) {
-    product_file_path <-
-        file.path(paths$tracker_root, product_file)
-    logDebug("Start process_product_file.")
-    logInfo(
-        "Current file: ",
-        product_file_name
-    )
-
-    df_product_raw <- arrow::read_parquet(product_file_path)
-
-    df_product_raw <- reading_product_data_step2(df_product_raw, synonyms_product)
-
-    logDebug(
-        "df_product_raw dim: ",
-        dim(df_product_raw) %>% as.data.frame(),
-        "."
-    )
-
-    export_data_as_parquet(
-        data = df_product_raw,
-        filename = stringr::str_replace(product_file_name, "_product_raw", ""),
-        output_root = output_root,
-        suffix = "_product_cleaned"
-    )
-
-    logInfo("Finish process_product_file.")
-}
-
-main()
-
-clearLoggers()
+future::plan("multisession")
+
+paths <- a4d::init_paths(c("patient_data_cleaned", "product_data_cleaned"), delete = TRUE)
+a4d::setup_logger(paths$output_root, "script2")
+patient_data_files <- a4d::get_files(paths$tracker_root, pattern = "patient_raw.parquet$")
+product_data_files <- a4d::get_files(paths$tracker_root, pattern = "product_raw.parquet$")
+ParallelLogger::logInfo(
+    "Found ",
+    length(patient_data_files),
+    " patient csv files under ",
+    paths$tracker_root,
+    "."
+)
+ParallelLogger::logInfo(
+    "Found ",
+    length(product_data_files),
+    " product csv files under ",
+    paths$tracker_root,
+    "."
+)
+
+ParallelLogger::logInfo("Start processing patient csv files.")
+
+progressr::with_progress({
+    p <- progressr::progressor(steps = length(patient_data_files))
+
+    result <- furrr::future_map(
+        patient_data_files,
+        a4d::process_patient_file,
+        paths = paths,
+        p = p
+    )
+})
+
+ParallelLogger::logInfo("Finish processing all patient csv files.")
+
+ParallelLogger::logDebug("Start processing product csv files.")
+
+progressr::with_progress({
+    p <- progressr::progressor(steps = length(product_data_files))
+
+    result <- furrr::future_map(
+        product_data_files,
+        a4d::process_product_file,
+        paths = paths,
+        p = p
+    )
+})
+
+ParallelLogger::logInfo("Finish processing all csv files.")
+
+ParallelLogger::clearLoggers()
diff --git a/scripts/run_script_3_create_tables.R b/scripts/run_script_3_create_tables.R
index 7962867..4cd6216 100644
--- a/scripts/run_script_3_create_tables.R
+++ b/scripts/run_script_3_create_tables.R
@@ -9,14 +9,14 @@ main <- function() {
     setup_logger(paths$output_root, "script3")
     patient_data_files <- get_files(file.path(paths$output_root, "patient_data_cleaned"), pattern = "\\.parquet$")
     product_data_files <- get_files(file.path(paths$output_root, "product_data_cleaned"), pattern = "\\.parquet$")
-    logInfo(
+    ParallelLogger::logInfo(
         "Found ",
         length(patient_data_files),
         " patient csv files under ",
         paths$tracker_root,
         "."
     )
-    logInfo(
+    ParallelLogger::logInfo(
         "Found ",
         length(product_data_files),
         " product csv files under ",
@@ -24,7 +24,7 @@ main <- function() {
         "."
     )
 
-    logInfo("Start creating table csv files.")
+    ParallelLogger::logInfo("Start creating table csv files.")
 
     logfile <- "table_patient_data_static"
     with_file_logger(logfile,
@@ -34,10 +34,10 @@ main <- function() {
                     create_table_patient_data_static(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
                 },
                 error = function(e) {
-                    logError("Could not create table csv for static patient data. Error: ", e$message)
+                    ParallelLogger::logError("Could not create table csv for static patient data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not create table csv for static patient data. Error: ", w$message)
+                    ParallelLogger::logWarn("Could not create table csv for static patient data. Error: ", w$message)
                 }
             )
         },
@@ -52,10 +52,10 @@ main <- function() {
                     create_table_patient_data_monthly(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
                 },
                 error = function(e) {
-                    logError("Could not create table csv for monthly patient data. Error: ", e$message)
+                    ParallelLogger::logError("Could not create table csv for monthly patient data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
+                    ParallelLogger::logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
                 }
             )
         },
@@ -76,10 +76,10 @@ main <- function() {
                     )
                 },
                 error = function(e) {
-                    logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
+                    ParallelLogger::logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
+                    ParallelLogger::logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
                 }
             )
         },
@@ -94,19 +94,19 @@ main <- function() {
                     create_table_product_data(file.path(paths$output_root, "product_data_cleaned"), paths$tables)
                 },
                 error = function(e) {
-                    logError("Could not create table for product data. Error: ", e$message)
+                    ParallelLogger::logError("Could not create table for product data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not create table for product data. Warning: ", w$message)
+                    ParallelLogger::logWarn("Could not create table for product data. Warning: ", w$message)
                 }
             )
         },
         output_root = paths$output_root
     )
 
-    logInfo("Finish creating table files.")
+    ParallelLogger::logInfo("Finish creating table files.")
 
-    logInfo("Trying to link files for product and patient data.")
+    ParallelLogger::logInfo("Trying to link files for product and patient data.")
 
     logfile <- "link_product_patient_data"
 
@@ -120,17 +120,17 @@ main <- function() {
                     )
                 },
                 error = function(e) {
-                    logError("Could not link files for product and patient data. Error: ", e$message)
+                    ParallelLogger::logError("Could not link files for product and patient data. Error: ", e$message)
                 },
                 warning = function(w) {
-                    logWarn("Could not link files for product and patient data. Warning: ", w$message)
+                    ParallelLogger::logWarn("Could not link files for product and patient data. Warning: ", w$message)
                 }
             )
         },
         output_root = paths$output_root
     )
 
-    logInfo("Finished linking files for product and patient data.")
+    ParallelLogger::logInfo("Finished linking files for product and patient data.")
 }
 
 main()

From fef6e826915692ae4ff3baba5b88010517469849 Mon Sep 17 00:00:00 2001
From: Michael Aydinbas <michael.aydinbas@gmail.com>
Date: Sat, 30 Dec 2023 16:05:59 +0100
Subject: [PATCH 4/8] working version for script 1 and 2

---
 DESCRIPTION                                |   5 +-
 NAMESPACE                                  |   1 +
 R/create_table_patient_data.R              |   6 +-
 R/create_table_patient_data_changes_only.R |   6 +-
 R/create_table_patient_data_static.R       |   6 +-
 R/create_table_product_data.R              |  43 ++--
 R/helper_main.R                            |  17 +-
 R/helper_patient_data_fix.R                |  38 ++--
 R/helper_product_data.R                    |  12 +-
 R/helper_read_patient_data.R               |  32 +--
 R/{helper_scipt_1.R => helper_script_1.R}  |  38 ++--
 R/helper_script_2.R                        |  66 +++---
 R/link_product_patient.R                   |  13 +-
 R/read_cleaned_patient_data.R              |   4 +-
 R/read_patient_data.R                      |  26 +--
 R/read_product_data.R                      |  60 +++---
 man/process_patient_data.Rd                |   2 +-
 man/process_patient_file.Rd                |   4 +-
 man/process_product_data.Rd                |   2 +-
 man/process_product_file.Rd                |   4 +-
 man/process_tracker_file.Rd                |   8 +-
 renv.lock                                  |  92 ++++----
 scripts/pre_commit.R                       |   2 +
 scripts/run_script_1_extract_raw_data.R    |  34 +--
 scripts/run_script_2_clean_data.R          |  60 ++++--
 scripts/run_script_3_create_tables.R       | 237 ++++++++++-----------
 26 files changed, 430 insertions(+), 388 deletions(-)
 rename R/{helper_scipt_1.R => helper_script_1.R} (76%)

diff --git a/DESCRIPTION b/DESCRIPTION
index c273a8f..1ad85e1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -9,6 +9,7 @@ Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 Depends:
+    ParallelLogger
 Imports: 
     data.table,
     digest,
@@ -16,7 +17,6 @@ Imports:
     here,
     lubridate,
     openxlsx,
-    ParallelLogger,
     readr,
     readxl,
     stringr,
@@ -27,7 +27,8 @@ Imports:
     zoo,
     arrow,
     furrr,
-    progressr
+    progressr,
+    tictoc
 Suggests: 
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index ca93149..298a603 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,6 +14,7 @@ export(fix_id)
 export(fix_sex)
 export(fix_t1d_diagnosis_age)
 export(fix_testing_frequency)
+export(get_allowed_provinces)
 export(get_files)
 export(get_synonyms)
 export(get_tracker_year)
diff --git a/R/create_table_patient_data.R b/R/create_table_patient_data.R
index c6793ed..3eec539 100644
--- a/R/create_table_patient_data.R
+++ b/R/create_table_patient_data.R
@@ -8,8 +8,10 @@
 #' @param patient_data_files list of CSV files with cleaned patient data from step 2.
 #' @param input_root root directory of the input CSV files.
 #' @param output_root root directory of the output folder.
+#'
+#' @export
 create_table_patient_data_monthly <- function(patient_data_files, input_root, output_root) {
-    ParallelLogger::logInfo("Start creating single csv for table patient_data_monthly.")
+    logInfo("Start creating single csv for table patient_data_monthly.")
 
     # THERE MIGHT BE MONTHLY COLUMNS MISSING - PLEASE ADD THEM
     dynamic_patient_columns <-
@@ -58,5 +60,5 @@ create_table_patient_data_monthly <- function(patient_data_files, input_root, ou
         suffix = ""
     )
 
-    ParallelLogger::logInfo("Finish creating single csv for table patient_data_monthly.")
+    logInfo("Finish creating single csv for table patient_data_monthly.")
 }
diff --git a/R/create_table_patient_data_changes_only.R b/R/create_table_patient_data_changes_only.R
index 1a4178c..8c28365 100644
--- a/R/create_table_patient_data_changes_only.R
+++ b/R/create_table_patient_data_changes_only.R
@@ -10,13 +10,15 @@
 #' @param output_root root directory of the output folder.
 #' @param variable name of the column that should be exported.
 #' @param name name used to create the export file name.
+#'
+#' @export
 create_table_longitudinal_data <-
     function(patient_data_files,
              input_root,
              output_root,
              variable,
              name) {
-        ParallelLogger::logInfo("Start creating single csv for table create_table_longitudinal_data and variable ", variable)
+        logInfo("Start creating single csv for table create_table_longitudinal_data and variable ", variable)
 
         dynamic_patient_columns <-
             c(
@@ -79,5 +81,5 @@ create_table_longitudinal_data <-
             suffix = ""
         )
 
-        ParallelLogger::logInfo("Finish creating single csv for table create_table_longitudinal_data and variable ", variable)
+        logInfo("Finish creating single csv for table create_table_longitudinal_data and variable ", variable)
     }
diff --git a/R/create_table_patient_data_static.R b/R/create_table_patient_data_static.R
index 55c8c1d..cd4e56b 100644
--- a/R/create_table_patient_data_static.R
+++ b/R/create_table_patient_data_static.R
@@ -8,8 +8,10 @@
 #' @param patient_data_files list of CSV files with cleaned patient data from step 2.
 #' @param input_root root directory of the input CSV files.
 #' @param output_root root directory of the output folder.
+#'
+#' @export
 create_table_patient_data_static <- function(patient_data_files, input_root, output_root) {
-    ParallelLogger::logInfo("Start creating single csv for table patient_data_static.")
+    logInfo("Start creating single csv for table patient_data_static.")
 
     # THERE MIGHT BE STATIC COLUMNS MISSING - PLEASE ADD THEM
     static_patient_columns <-
@@ -59,5 +61,5 @@ create_table_patient_data_static <- function(patient_data_files, input_root, out
         suffix = "_static"
     )
 
-    ParallelLogger::logInfo("Finish creating single csv for table patient_data_static.")
+    logInfo("Finish creating single csv for table patient_data_static.")
 }
diff --git a/R/create_table_product_data.R b/R/create_table_product_data.R
index 4ee7eaa..e88b09d 100644
--- a/R/create_table_product_data.R
+++ b/R/create_table_product_data.R
@@ -11,13 +11,14 @@
 #'
 #' @return This function does not return a value. It writes the merged data to a new CSV file
 #' (with reordered columns according to the list of fields) in the output_root directory.
+#' @export
 #'
 #' @examples
 #' \dontrun{
 #' create_table_product_data("path/to/input/directory", "path/to/output/directory")
 #' }
 create_table_product_data <- function(input_root, output_root) {
-    ParallelLogger::logInfo("Start creating single file for table product_data.")
+    logInfo("Start creating single file for table product_data.")
 
     # Get a list of all CSV files in the input_root directory
     files <- list.files(input_root, pattern = "*.parquet", full.names = TRUE)
@@ -25,7 +26,7 @@ create_table_product_data <- function(input_root, output_root) {
     # Read all CSV files and store them in a list
     data_list <- lapply(files, function(x) arrow::read_parquet(x))
 
-    ParallelLogger::logInfo(length(data_list), " files will be processed for creating the single file for table product_data.")
+    logInfo(length(data_list), " files will be processed for creating the single file for table product_data.")
 
     # Get the union of all column names
     all_names <- unique(unlist(lapply(data_list, colnames)))
@@ -39,25 +40,25 @@ create_table_product_data <- function(input_root, output_root) {
     # Merge all data frames
     merged_data <- do.call(rbind, data_list)
 
-    ParallelLogger::logDebug("Copying original parient IDs...")
+    logDebug("Copying original parient IDs...")
     merged_data$orig_product_released_to <- merged_data$product_released_to
 
-    ParallelLogger::logDebug("Trying to fix patient IDs...")
+    logDebug("Trying to fix patient IDs...")
     merged_data$product_released_to <- sapply(merged_data$product_released_to, fix_id)
 
-    ParallelLogger::logDebug("Extracting product_county and product_hospisal from patients IDs...")
+    logDebug("Extracting product_county and product_hospisal from patients IDs...")
     merged_data <- id_2_county_hospisal(
         merged_data, "product_released_to",
         "product_country", "product_hospital"
     )
 
-    ParallelLogger::logDebug("Calculating the most frequent 'product_hospital' for each 'file_name'...")
+    logDebug("Calculating the most frequent 'product_hospital' for each 'file_name'...")
     tryCatch(
         {
             merged_data <- calculate_most_frequent(merged_data, "file_name", "product_hospital", "table_hospital")
         },
         error = function(e) {
-            ParallelLogger::logError("Error in calculating the most frequent 'product_hospital': ", e)
+            logError("Error in calculating the most frequent 'product_hospital': ", e)
         }
     )
 
@@ -67,14 +68,14 @@ create_table_product_data <- function(input_root, output_root) {
             merged_data <- calculate_most_frequent(merged_data, "file_name", "product_country", "table_country")
         },
         error = function(e) {
-            ParallelLogger::logError("Error in calculating the most frequent 'product_country': ", e)
+            logError("Error in calculating the most frequent 'product_country': ", e)
         }
     )
 
     # Reorder, add, and ensures the correct data type for each column according to the list of fields
     merged_data <- preparing_product_fields(merged_data)
 
-    ParallelLogger::logDebug("Checking 'table_country' for each 'file_name'...")
+    logDebug("Checking 'table_country' for each 'file_name'...")
     report_empty_intersections(merged_data, "file_name", "table_country")
 
     # Write the merged and processed data to a file in the output_root directory
@@ -85,7 +86,7 @@ create_table_product_data <- function(input_root, output_root) {
         suffix = ""
     )
 
-    ParallelLogger::logInfo("Finish creating single file for table product_data.")
+    logInfo("Finish creating single file for table product_data.")
 }
 
 
@@ -168,7 +169,7 @@ preparing_product_fields <- function(merged_data) {
         "table_hospital" = "character"
     )
 
-    ParallelLogger::logInfo("Start processing fields for the single csv product_data...")
+    logInfo("Start processing fields for the single csv product_data...")
 
     # Check if all fields are present in merged_data
     missing_fields <- setdiff(names(fields), names(merged_data))
@@ -186,7 +187,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.Date(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    ParallelLogger::logWarn(paste(
+                    logWarn(paste(
                         "In", field, "incorrect date values were replaced with",
                         ERROR_VAL_DATE, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -198,7 +199,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.numeric(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    ParallelLogger::logWarn(paste(
+                    logWarn(paste(
                         "In", field, "incorrect numeric values were replaced with",
                         ERROR_VAL_NUMERIC, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -210,7 +211,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- suppressWarnings(as.integer(original_values))
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    ParallelLogger::logWarn(paste(
+                    logWarn(paste(
                         "In", field, "incorrect integer values were replaced with",
                         ERROR_VAL_NUMERIC, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -222,7 +223,7 @@ preparing_product_fields <- function(merged_data) {
                 merged_data[[field]] <- as.character(original_values)
                 incorrect_rows <- which(is.na(merged_data[[field]]) & !is.na(original_values))
                 if (length(incorrect_rows) > 0) {
-                    ParallelLogger::logWarn(paste(
+                    logWarn(paste(
                         "In", field, "incorrect character values  were replaced with",
                         ERROR_VAL_CHARACTER, "in", length(incorrect_rows), "rows:",
                         paste(incorrect_rows, collapse = ", ")
@@ -231,19 +232,19 @@ preparing_product_fields <- function(merged_data) {
                 }
             }
         }, warning = function(w) {
-            ParallelLogger::logError(paste("Warning in converting", field, ": ", w))
+            logError(paste("Warning in converting", field, ": ", w))
         }, error = function(e) {
-            ParallelLogger::logWarn(paste("Error in converting", field, ": ", e))
+            logWarn(paste("Error in converting", field, ": ", e))
         }, finally = {
-            ParallelLogger::logDebug(paste("Finished converting", field))
+            logDebug(paste("Finished converting", field))
         })
     }
 
     # Reorder the columns according to the list of fields
-    ParallelLogger::logInfo("Reorder the columns according to the list of fields...")
+    logInfo("Reorder the columns according to the list of fields...")
     merged_data <- merged_data[, c(names(fields), setdiff(names(merged_data), names(fields)))]
 
-    ParallelLogger::logInfo("Finished processing fields for the single csv product_data.")
+    logInfo("Finished processing fields for the single csv product_data.")
 
     return(merged_data)
 }
@@ -313,7 +314,7 @@ report_empty_intersections <- function(df, row_category, col_category) {
     df_row_sums <- df_row_sums[df_row_sums$sum == 0, ]
 
     if (nrow(df_row_sums) > 0) {
-        ParallelLogger::logWarn(
+        logWarn(
             "The number of ", row_category, " with empty ", col_category, " is ",
             nrow(df_row_sums), ": ",
             paste(df_row_sums$row_name, sep = "", collapse = ", ")
diff --git a/R/helper_main.R b/R/helper_main.R
index b5cd0a1..97bd8c0 100644
--- a/R/helper_main.R
+++ b/R/helper_main.R
@@ -139,7 +139,7 @@ read_column_synonyms <- function(synonym_file, path_prefixes = c("reference_data
 #' )
 #' }
 export_data <- function(data, filename, output_root, suffix) {
-    ParallelLogger::logDebug("Start export_data. Suffix = ", suffix, ".")
+    logDebug("Start export_data. Suffix = ", suffix, ".")
     data %>%
         write.csv(
             file =
@@ -156,7 +156,7 @@ export_data <- function(data, filename, output_root, suffix) {
             fileEncoding = "UTF-16LE",
             quote = T
         )
-    ParallelLogger::logInfo("Finish export_data. Suffix = ", suffix, ".")
+    logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -177,12 +177,12 @@ export_data <- function(data, filename, output_root, suffix) {
 #' )
 #' }
 export_data_as_parquet <- function(data, filename, output_root, suffix) {
-    ParallelLogger::logDebug("Start export_data. Suffix = ", suffix, ".")
+    logDebug("Start export_data. Suffix = ", suffix, ".")
     data %>%
         arrow::write_parquet(
             sink = file.path(output_root, paste0(filename, suffix, ".parquet")),
         )
-    ParallelLogger::logInfo("Finish export_data. Suffix = ", suffix, ".")
+    logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -193,7 +193,7 @@ export_data_as_parquet <- function(data, filename, output_root, suffix) {
 #'
 #' @return tibble with patient data
 read_raw_csv <- function(file) {
-    ParallelLogger::logDebug("Start reading data with read_csv.")
+    logDebug("Start reading data with read_csv.")
     df_patient_raw <- readr::read_csv(
         file,
         name_repair = "check_unique",
@@ -202,9 +202,9 @@ read_raw_csv <- function(file) {
         col_types = readr::cols(.default = "c"),
         locale = readr::locale(encoding = "UTF-16LE")
     )
-    ParallelLogger::logDebug("Finished loading data with read_csv.")
-    ParallelLogger::logInfo("Dim: ", dim(df_patient_raw))
-    ParallelLogger::logInfo("Columns: ", spec(df_patient_raw))
+    logDebug("Finished loading data with read_csv.")
+    logInfo("Dim: ", dim(df_patient_raw))
+    logInfo("Columns: ", spec(df_patient_raw))
 
     df_patient_raw
 }
@@ -216,6 +216,7 @@ read_raw_csv <- function(file) {
 #' Read in all provinces from a YAML file inside the provinces folder.
 #'
 #' @return A named character vector with all allowed provinces.
+#' @export
 get_allowed_provinces <- function() {
     ## Should new countries and provinces be added, update the YAML file
     provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") |> unlist()
diff --git a/R/helper_patient_data_fix.R b/R/helper_patient_data_fix.R
index 5b10d72..05bb168 100644
--- a/R/helper_patient_data_fix.R
+++ b/R/helper_patient_data_fix.R
@@ -24,11 +24,11 @@ convert_to <- function(x, cast_fnc, error_val, col_name = "", id = "") {
     x <- tryCatch(
         cast_fnc(x),
         error = function(e) {
-            ParallelLogger::logError("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
+            logError("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
             x <- error_val
         },
         warning = function(w) {
-            ParallelLogger::logWarn("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
+            logWarn("Could not convert value ", x, " in column ", col_name, " for patient: ", id)
             x <- error_val
         }
     )
@@ -55,7 +55,7 @@ cut_numeric_value <- function(x,
     }
 
     if (x < min || x > max) {
-        ParallelLogger::logWarn(
+        logWarn(
             "Found invalid value ", x, " for column ", col_name, " outside [", min, ", ", max, "]. ",
             "Value was replaced with ", ERROR_VAL_NUMERIC, "."
         )
@@ -154,7 +154,7 @@ parse_dates <- function(date) {
     parsed_date <- suppressWarnings(lubridate::as_date(date))
 
     if (is.na(parsed_date)) {
-        ParallelLogger::logWarn(
+        logWarn(
             "Could not parse date value ", date, ". ",
             "Trying to parse with lubridate::parse_date_time and orders = c('dmy', 'dmY', 'by', 'bY')."
         )
@@ -195,9 +195,9 @@ check_allowed_values <- function(x, valid_values, id, replace_invalid = TRUE, er
     valid_value_mapping <- setNames(as.list(valid_values), sanitize_str(valid_values))
 
     if (!sanitize_str(x) %in% names(valid_value_mapping)) {
-        ParallelLogger::logWarn("Patient ", id, ": Value ", x, " for column ", col, " is not in the list of allowed values. ")
+        logWarn("Patient ", id, ": Value ", x, " for column ", col, " is not in the list of allowed values. ")
         if (replace_invalid) {
-            ParallelLogger::logInfo("Replacing ", x, " with ", error_val, ".")
+            logInfo("Replacing ", x, " with ", error_val, ".")
             return(error_val)
         } else {
             return(x)
@@ -275,13 +275,13 @@ fix_age <- function(age, dob, tracker_year, tracker_month, id) {
         }
 
         if (is.na(age)) {
-            ParallelLogger::logWarn(
+            logWarn(
                 "Patient ", id, ": age is missing. Using calculated age ", calc_age,
                 " instead of original age."
             )
         } else {
             if (calc_age != age) {
-                ParallelLogger::logWarn(
+                logWarn(
                     "Patient ", id, ": age ", age, " is different from calculated age ", calc_age,
                     ". Using calculated age instead of original age."
                 )
@@ -289,7 +289,7 @@ fix_age <- function(age, dob, tracker_year, tracker_month, id) {
         }
 
         if (calc_age < 0) {
-            ParallelLogger::logWarn("Patient ", id, ": calculated age is negative. Something went wrong.")
+            logWarn("Patient ", id, ": calculated age is negative. Something went wrong.")
             calc_age <- ERROR_VAL_NUMERIC
         }
     }
@@ -322,11 +322,11 @@ fix_bmi <- function(weight, height, id) {
 
 
     if (!is.na(weight) && weight == ERROR_VAL_CHARACTER) {
-        ParallelLogger::logWarn("Patient ", id, ": the weight is out of bounds.")
+        logWarn("Patient ", id, ": the weight is out of bounds.")
     }
 
     if (!is.na(height) && height == ERROR_VAL_CHARACTER) {
-        ParallelLogger::logWarn("Patient ", id, ": the height is out of bounds.")
+        logWarn("Patient ", id, ": the height is out of bounds.")
     }
     bmi
 }
@@ -353,7 +353,7 @@ fix_sex <- function(sex, id) {
     )
 
     if (!is.na(fixed_sex) && fixed_sex == ERROR_VAL_CHARACTER) {
-        ParallelLogger::logWarn("Patient ", id, ": sex ", sex, " is not in the list of synonyms. Replacing it with ", fixed_sex, ".")
+        logWarn("Patient ", id, ": sex ", sex, " is not in the list of synonyms. Replacing it with ", fixed_sex, ".")
     }
     fixed_sex
 }
@@ -463,7 +463,7 @@ fix_testing_frequency <- function(test_frq) {
     }
 
     if (grepl("-", test_frq, fixed = TRUE)) {
-        ParallelLogger::logInfo("Found a range for testing_frequency. Replacing it with the mean.")
+        logInfo("Found a range for testing_frequency. Replacing it with the mean.")
         test_frq <- try(as.character(replace_range_with_mean(test_frq), silent = TRUE))
     }
 
@@ -492,7 +492,7 @@ replace_range_with_mean <- function(x) {
 #'
 #' @return data frame with two new columns: blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.
 split_bp_in_sys_and_dias <- function(df) {
-    ParallelLogger::logInfo("Splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
+    logInfo("Splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
     df <- df %>%
         dplyr::mutate(
             blood_pressure_mmhg = dplyr::case_when(
@@ -502,7 +502,7 @@ split_bp_in_sys_and_dias <- function(df) {
         )
 
     if (paste(ERROR_VAL_NUMERIC, ERROR_VAL_NUMERIC, sep = "/") %in% df$blood_pressure_mmhg) {
-        ParallelLogger::logWarn(
+        logWarn(
             "Found invalid values for column blood_pressure_mmhg that do not follow the format X/Y. ",
             "Values were replaced with ", ERROR_VAL_NUMERIC, "."
         )
@@ -514,7 +514,7 @@ split_bp_in_sys_and_dias <- function(df) {
             delim = "/",
             names = c("blood_pressure_sys_mmhg", "blood_pressure_dias_mmhg"),
         )
-    ParallelLogger::logDebug("Finished splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
+    logDebug("Finished splitting blood_pressure_mmhg into blood_pressure_sys_mmhg and blood_pressure_dias_mmhg.")
     df
 }
 
@@ -550,12 +550,12 @@ fix_id <- function(id) {
     id <- stringr::str_replace(id, "-", "_")
 
     if (!grepl("^[[:upper:]]{2}_[[:upper:]]{2}[[:digit:]]{3}$", id)) {
-        ParallelLogger::logWarn("Patient ", id, ": id cannot be matched to a 7 letter alpha numeric code like XX_YY001. ")
+        logWarn("Patient ", id, ": id cannot be matched to a 7 letter alpha numeric code like XX_YY001. ")
         if (stringr::str_length(id) > 8) {
-            ParallelLogger::logWarn("Patient ", id, ": id was truncated because it is longer than 8 characters.")
+            logWarn("Patient ", id, ": id was truncated because it is longer than 8 characters.")
             id <- stringr::str_sub(id, 1, 8)
         } else {
-            ParallelLogger::logError("Patient ", id, ": id is not valid.")
+            logError("Patient ", id, ": id is not valid.")
             id <- ERROR_VAL_CHARACTER
         }
     }
diff --git a/R/helper_product_data.R b/R/helper_product_data.R
index 3e56f6b..6f04cd7 100644
--- a/R/helper_product_data.R
+++ b/R/helper_product_data.R
@@ -83,7 +83,7 @@ get_patient_end <- function(df, j) {
 
 # @Description: Reads product data from a monthly file based on extraction logic
 extract_product_data <- function(monthly_tracker_df) {
-    ParallelLogger::logDebug("Starting extract_product_data.")
+    logDebug("Starting extract_product_data.")
     start_df_msd <- NULL
     end_df_msd <- NULL
 
@@ -103,7 +103,7 @@ extract_product_data <- function(monthly_tracker_df) {
     # Clean empty remaining first row
     product_data_df <- set_second_row_as_headers_and_remove_first_row(product_data_df)
 
-    ParallelLogger::logDebug("Finish extract_product_data.")
+    logDebug("Finish extract_product_data.")
 
     return(product_data_df)
 }
@@ -134,7 +134,7 @@ extract_product_data <- function(monthly_tracker_df) {
 # column synonyms to unify column names
 # @columns_synonyms: Long format output of read_column_synonyms to match columns
 harmonize_input_data_columns <- function(product_df, columns_synonyms) {
-    ParallelLogger::logDebug("Start harmonize_input_data_columns.")
+    logDebug("Start harmonize_input_data_columns.")
     # In case that there is additional data in strange columns, keep only relevant columns
     # keep.cols <- names(product_df) %in% c("")
 
@@ -147,14 +147,14 @@ harmonize_input_data_columns <- function(product_df, columns_synonyms) {
     ## report all column names which have not been found
     unknown_column_names <- colnames(product_df)[!colnames(product_df) %in% synonym_headers]
     if (length(unknown_column_names) > 0) {
-        ParallelLogger::logWarn("Unknown column names in sheet: ", paste(unknown_column_names, collapse = ", "))
+        logWarn("Unknown column names in sheet: ", paste(unknown_column_names, collapse = ", "))
     }
 
     # replacing var codes
     colnames_found <- match(colnames(product_df), synonym_headers, nomatch = 0)
     colnames(product_df)[colnames(product_df) %in% synonym_headers] <- columns_synonyms$name_clean[colnames_found]
 
-    ParallelLogger::logDebug("Finish harmonize_input_data_columns.")
+    logDebug("Finish harmonize_input_data_columns.")
     if (sum(colnames_found == 0) != 0) {
         "Non-matching column names found (see 0)"
         # SK: remove non matching column names
@@ -282,7 +282,7 @@ update_receivedfrom <- function(product_df) {
                 grepl("Balance", product_units_received, ignore.case = TRUE) & !is.na(product_received_from) ~ product_received_from
             )) %>%
             dplyr::mutate(product_units_released = ifelse(!is.na(product_received_from), NA, product_units_released))
-        ParallelLogger::logInfo("The rule for the case was applied successfully- Released (product_units_released) column also includes values for Start/End Balance")
+        logInfo("The rule for the case was applied successfully- Released (product_units_released) column also includes values for Start/End Balance")
     }
     return(product_df)
 }
diff --git a/R/helper_read_patient_data.R b/R/helper_read_patient_data.R
index 8f61b04..cca5779 100644
--- a/R/helper_read_patient_data.R
+++ b/R/helper_read_patient_data.R
@@ -1,7 +1,7 @@
 # extracting country and clinic code from patient ID
 # expects that patient ID has a certain format
 extract_country_clinic_code <- function(patient_data) {
-    ParallelLogger::logDebug("Start extract_country_clinic_code.")
+    logDebug("Start extract_country_clinic_code.")
     patient_ids <- patient_data["id"] %>%
         dplyr::filter(id != "0") %>%
         tidyr::drop_na() %>%
@@ -19,9 +19,9 @@ extract_country_clinic_code <- function(patient_data) {
     clinic_code <-
         names(sort(table(patient_ids$clinic), decreasing = T))[1]
 
-    ParallelLogger::logDebug("country_code = ", country_code, ".")
-    ParallelLogger::logDebug("clinic_code = ", clinic_code, ".")
-    ParallelLogger::logDebug("Finish extract_country_clinic_code.")
+    logDebug("country_code = ", country_code, ".")
+    logDebug("clinic_code = ", clinic_code, ".")
+    logDebug("Finish extract_country_clinic_code.")
     return(list("country_code" = country_code, "clinic_code" = clinic_code))
 }
 
@@ -40,9 +40,9 @@ extract_country_clinic_code <- function(patient_data) {
 #' @return data.frame with the patient data
 #' @export
 extract_patient_data <- function(tracker_data_file, sheet, year) {
-    ParallelLogger::logDebug("Start extract_patient_data for sheet = ", sheet, ".")
+    logDebug("Start extract_patient_data for sheet = ", sheet, ".")
 
-    ParallelLogger::logDebug("Start openxlsx::read.xlsx to get tracker_data.")
+    logDebug("Start openxlsx::read.xlsx to get tracker_data.")
     tracker_data <- openxlsx::read.xlsx(
         xlsxFile = tracker_data_file,
         sheet = sheet,
@@ -59,7 +59,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     # col_names = F,
     # .name_repair = "unique_quiet"
     # )
-    ParallelLogger::logDebug("Finish openxlsx::read.xlsx.")
+    logDebug("Finish openxlsx::read.xlsx.")
 
     # Assumption: first column is always empty until patient data begins
     patient_data_range <- which(!is.na(tracker_data[, 1]))
@@ -80,8 +80,8 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
         row_max <- row_max + 1
     }
 
-    ParallelLogger::logInfo("Patient data found in rows ", row_min, " to ", row_max, ".")
-    ParallelLogger::logDebug("Start readxl::read_excel to get patient data.")
+    logInfo("Patient data found in rows ", row_min, " to ", row_max, ".")
+    logDebug("Start readxl::read_excel to get patient data.")
     df_patient <- readxl::read_excel(
         path = tracker_data_file,
         sheet = sheet,
@@ -91,11 +91,11 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
         col_types = c("text"),
         .name_repair = "unique_quiet"
     )
-    ParallelLogger::logDebug("Finish readxl::read_excel.")
+    logDebug("Finish readxl::read_excel.")
 
     if (header_cols[2] == header_cols_2[2]) {
         # take into account that date info gets separated from the updated values (not in the same row, usually in the bottom row)
-        ParallelLogger::logInfo("Read in multiline header.")
+        logInfo("Read in multiline header.")
 
         diff_colnames <- which((header_cols != header_cols_2))
         header_cols[diff_colnames] <-
@@ -106,7 +106,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     }
 
     colnames(df_patient) <- header_cols
-    ParallelLogger::logDebug("Found patient column names = ", paste(header_cols, collapse = ","), ".")
+    logDebug("Found patient column names = ", paste(header_cols, collapse = ","), ".")
 
     # delete columns without a header (=NA)
     df_patient <- df_patient[, !is.na(colnames(df_patient))]
@@ -117,7 +117,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
     df_patient <-
         df_patient[rowSums(is.na(df_patient)) != ncol(df_patient), ]
 
-    ParallelLogger::logDebug("Finish extract_patient_data.")
+    logDebug("Finish extract_patient_data.")
 
     df_patient
 }
@@ -137,7 +137,7 @@ extract_patient_data <- function(tracker_data_file, sheet, year) {
 #' @export
 harmonize_patient_data_columns <-
     function(patient_df, columns_synonyms) {
-        ParallelLogger::logDebug("Start harmonize_patient_data_columns.")
+        logDebug("Start harmonize_patient_data_columns.")
 
         patient_df <- patient_df[!is.na(names(patient_df))]
 
@@ -151,11 +151,11 @@ harmonize_patient_data_columns <-
 
         mismatching_column_ids <- which(colnames_found == 0)
         if (length(mismatching_column_ids) > 0) {
-            ParallelLogger::logWarn(
+            logWarn(
                 "Non-matching column names found: ", paste(colnames(patient_df)[mismatching_column_ids], collapse = ","), "."
             )
         }
 
-        ParallelLogger::logDebug("Finish harmonize_patient_data_columns.")
+        logDebug("Finish harmonize_patient_data_columns.")
         patient_df
     }
diff --git a/R/helper_scipt_1.R b/R/helper_script_1.R
similarity index 76%
rename from R/helper_scipt_1.R
rename to R/helper_script_1.R
index f1cf102..67d2901 100644
--- a/R/helper_scipt_1.R
+++ b/R/helper_script_1.R
@@ -1,19 +1,17 @@
 #' @title Process a single tracker file and extract patient and product data.
 #'
-#' @param tracker_file Filename of the tracler.
+#' @param tracker_file File name of the tracker.
 #' @param paths a list with the paths to the tracker root dir, the patient and product output dir and the root output dir.
-#' @param p progressor from progressr package.
+#' @param synonyms a list with the synonyms for patient and product data header names.
 #'
 #' @export
-process_tracker_file <- function(tracker_file, paths, p) {
-    p()
+process_tracker_file <- function(tracker_file, paths, synonyms) {
     tracker_name <- tools::file_path_sans_ext(basename(tracker_file))
-    synonyms <- get_synonyms()
     tracker_data_file <-
         file.path(paths$tracker_root, tracker_file)
 
-    ParallelLogger::logDebug("Start process_tracker_file.")
-    ParallelLogger::logInfo(
+    logDebug("Start process_tracker_file.")
+    logInfo(
         "Current file: ",
         tracker_name
     )
@@ -29,10 +27,10 @@ process_tracker_file <- function(tracker_file, paths, p) {
                     synonyms_patient = synonyms$patient
                 ),
                 error = function(e) {
-                    ParallelLogger::logError("Could not process patient data. Error = ", e$message, ".")
+                    logError("Could not process patient data. Error = ", e$message, ".")
                 },
                 warning = function(w) {
-                    ParallelLogger::logWarn("Could not process patient data. Warning = ", w$message, ".")
+                    logWarn("Could not process patient data. Warning = ", w$message, ".")
                 }
             )
         },
@@ -51,17 +49,17 @@ process_tracker_file <- function(tracker_file, paths, p) {
                     synonyms_product = synonyms$product
                 ),
                 error = function(e) {
-                    ParallelLogger::logError("Could not process product data. Error = ", e$message, ".")
+                    logError("Could not process product data. Error = ", e$message, ".")
                 },
                 warning = function(w) {
-                    ParallelLogger::logWarn("Could not process product data. Warning = ", w$message, ".")
+                    logWarn("Could not process product data. Warning = ", w$message, ".")
                 }
             )
         },
         output_root = paths$output_root
     )
 
-    ParallelLogger::logDebug("Finish process_tracker_file.")
+    logDebug("Finish process_tracker_file.")
 }
 
 
@@ -78,7 +76,7 @@ process_patient_data <-
              tracker_data_file,
              output_root,
              synonyms_patient) {
-        ParallelLogger::logDebug("Start process_patient_data.")
+        logDebug("Start process_patient_data.")
 
         df_raw_patient <-
             reading_patient_data(
@@ -88,7 +86,7 @@ process_patient_data <-
 
         df_raw_patient <- df_raw_patient %>% dplyr::mutate(file_name = tracker_name)
 
-        ParallelLogger::logDebug(
+        logDebug(
             "df_raw_patient dim: ",
             dim(df_raw_patient) %>% as.data.frame(),
             "."
@@ -101,7 +99,7 @@ process_patient_data <-
             suffix = "_patient_raw"
         )
 
-        ParallelLogger::logDebug("Finish process_patient_data.")
+        logDebug("Finish process_patient_data.")
     }
 
 
@@ -118,7 +116,7 @@ process_product_data <-
              tracker_data_file,
              output_root,
              synonyms_product) {
-        ParallelLogger::logDebug("Start process_product_data.")
+        logDebug("Start process_product_data.")
 
         df_raw_product <-
             reading_product_data_step1(
@@ -129,10 +127,10 @@ process_product_data <-
         if (!is.null(df_raw_product)) {
             df_raw_product <- df_raw_product %>% dplyr::mutate(file_name = tracker_name)
         } else {
-            ParallelLogger::logDebug("Empty product data")
+            logDebug("Empty product data")
         }
 
-        ParallelLogger::logDebug(
+        logDebug(
             "df_raw_product dim: ",
             dim(df_raw_product) %>% as.data.frame(),
             "."
@@ -147,7 +145,7 @@ process_product_data <-
                 suffix = "_product_raw"
             )
         } else {
-            ParallelLogger::logWarn("No product data in the file")
+            logWarn("No product data in the file")
         }
-        ParallelLogger::logDebug("Finish process_product_data.")
+        logDebug("Finish process_product_data.")
     }
diff --git a/R/helper_script_2.R b/R/helper_script_2.R
index 035b5db..e1c444b 100644
--- a/R/helper_script_2.R
+++ b/R/helper_script_2.R
@@ -11,12 +11,10 @@
 #'
 #' @param patient_file Path to patient raw parquet file from first script.
 #' @param paths List with paths to patient_data_cleaned and product_data_cleaned folders.
-#' @param p progressor from progressr package.
+#' @param allowed_provinces A named character vector with names of allowed provinces.
 #'
 #' @export
-process_patient_file <- function(patient_file, paths, p) {
-    p()
-
+process_patient_file <- function(patient_file, paths, allowed_provinces) {
     ERROR_VAL_NUMERIC <<- 999999
     ERROR_VAL_CHARACTER <<- "Undefined"
     ERROR_VAL_DATE <<- "9999-09-09"
@@ -26,8 +24,8 @@ process_patient_file <- function(patient_file, paths, p) {
         file.path(paths$tracker_root, patient_file)
     output_root <- paths$patient_data_cleaned
 
-    ParallelLogger::logDebug("Start process_patient_file.")
-    ParallelLogger::logInfo(
+    logDebug("Start process_patient_file.")
+    logInfo(
         "Current file: ",
         patient_file_name
     )
@@ -40,26 +38,25 @@ process_patient_file <- function(patient_file, paths, p) {
                 process_patient_file_worker(
                     patient_file_path = patient_file_path,
                     patient_file_name = patient_file_name,
-                    output_root = output_root
+                    output_root = output_root,
+                    allowed_provinces = allowed_provinces
                 ),
                 error = function(e) {
-                    ParallelLogger::logError("Could not process raw patient data. Error = ", e$message, ".")
+                    logError("Could not process raw patient data. Error = ", e$message, ".")
                 },
                 warning = function(w) {
-                    ParallelLogger::logWarn("Could not process raw patient data. Warning = ", w$message, ".")
+                    logWarn("Could not process raw patient data. Warning = ", w$message, ".")
                 }
             )
         },
         output_root = paths$output_root
     )
 
-    ParallelLogger::logInfo("Finish process_patient_file.")
+    logInfo("Finish process_patient_file.")
 }
 
 
-process_patient_file_worker <- function(patient_file_path, patient_file_name, output_root) {
-    allowed_provinces <- get_allowed_provinces()
-
+process_patient_file_worker <- function(patient_file_path, patient_file_name, output_root, allowed_provinces) {
     df_patient_raw <- arrow::read_parquet(patient_file_path)
 
     # filter all rows with no patient id or patient name
@@ -71,24 +68,24 @@ process_patient_file_worker <- function(patient_file_path, patient_file_name, ou
     # data before 2019 had only one column for updated hba1c and fbg
     # with date as part of the value
     if (!"hba1c_updated_date" %in% colnames(df_patient_raw) && "hba1c_updated" %in% colnames(df_patient_raw)) {
-        ParallelLogger::logInfo("Column updated_hba1c_date not found. Trying to parse from hba1c_updated.")
+        logInfo("Column updated_hba1c_date not found. Trying to parse from hba1c_updated.")
         df_patient_raw <-
             extract_date_from_measurement(df_patient_raw, "hba1c_updated")
-        ParallelLogger::logDebug("Finished parsing dates from hba1c_updated.")
+        logDebug("Finished parsing dates from hba1c_updated.")
     }
 
     if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mg" %in% colnames(df_patient_raw)) {
-        ParallelLogger::logInfo("Column updated_fbg_date not found. Trying to parse from fbg_updated_mg.")
+        logInfo("Column updated_fbg_date not found. Trying to parse from fbg_updated_mg.")
         df_patient_raw <-
             extract_date_from_measurement(df_patient_raw, "fbg_updated_mg")
-        ParallelLogger::logDebug("Finished parsing dates from fbg_updated_mg.")
+        logDebug("Finished parsing dates from fbg_updated_mg.")
     }
 
     if (!"fbg_updated_date" %in% colnames(df_patient_raw) && "fbg_updated_mmol" %in% colnames(df_patient_raw)) {
-        ParallelLogger::logInfo("Column fbg_updated_date not found. Trying to parse from fbg_updated_mmol.")
+        logInfo("Column fbg_updated_date not found. Trying to parse from fbg_updated_mmol.")
         df_patient_raw <-
             extract_date_from_measurement(df_patient_raw, "fbg_updated_mmol")
-        ParallelLogger::logDebug("Finished parsing dates from fbg_updated_mmol.")
+        logDebug("Finished parsing dates from fbg_updated_mmol.")
     }
 
     # blood pressure is given as sys/dias value pair,
@@ -101,7 +98,7 @@ process_patient_file_worker <- function(patient_file_path, patient_file_name, ou
     # depending on the equipment being used.
     # If the reading is above the maximum available value the > sign is used -
     # we would prefer to retain this character in the database as it is important for data analysis.
-    ParallelLogger::logInfo("Adding columns hba1c_baseline_exceeds and hba1c_updated_exceeds.")
+    logInfo("Adding columns hba1c_baseline_exceeds and hba1c_updated_exceeds.")
     df_patient_raw <- df_patient_raw %>%
         dplyr::mutate(
             hba1c_baseline_exceeds = ifelse(grepl(">|<", hba1c_baseline), TRUE, FALSE),
@@ -111,7 +108,7 @@ process_patient_file_worker <- function(patient_file_path, patient_file_name, ou
     # --- META SCHEMA ---
     # meta schema has all final columns for the database
     # along with their corresponding data types
-    ParallelLogger::logInfo("Creating meta schema.")
+    logInfo("Creating meta schema.")
     # short type string for read_csv:
     # iiinDccDcnnDnncnlnlDncDccDDDccccDccccciDciiiDn
     schema <- tibble::tibble(
@@ -181,15 +178,15 @@ process_patient_file_worker <- function(patient_file_path, patient_file_name, ou
     )
 
     cols_extra <- colnames(df_patient_raw)[!colnames(df_patient_raw) %in% colnames(schema)]
-    ParallelLogger::logWarn("Extra columns in patient data: ", paste(cols_extra, collapse = ", "))
+    logWarn("Extra columns in patient data: ", paste(cols_extra, collapse = ", "))
 
     cols_missing <-
         colnames(schema)[!colnames(schema) %in% colnames(df_patient_raw)]
-    ParallelLogger::logWarn("Missing columns in patient data: ", paste(cols_missing, collapse = ", "))
+    logWarn("Missing columns in patient data: ", paste(cols_missing, collapse = ", "))
 
     # add all columns of schema to df_patient_raw
     # keep all rows, only append missing cols
-    ParallelLogger::logInfo("Merging df_patient with meta schema and selecting all columns of meta schema.")
+    logInfo("Merging df_patient with meta schema and selecting all columns of meta schema.")
     df_patient <- merge.default(df_patient_raw, schema, all.x = T)
     df_patient <- df_patient[colnames(schema)]
 
@@ -290,7 +287,7 @@ process_patient_file_worker <- function(patient_file_path, patient_file_name, ou
     df_patient <- df_patient %>%
         dplyr::arrange(tracker_date, id)
 
-    ParallelLogger::logDebug(
+    logDebug(
         "df_patient dim: ",
         dim(df_patient) %>% as.data.frame(),
         "."
@@ -313,19 +310,16 @@ process_patient_file_worker <- function(patient_file_path, patient_file_name, ou
 #'
 #' @param product_file Path to product raw parquet file from first script.
 #' @param paths List with paths to patient_data_cleaned and product_data_cleaned folders.
-#' @param p progressor from progressr package.
+#' @param synonyms_product Synonyms for product data header names.
 #'
 #' @export
-process_product_file <- function(product_file, paths, p) {
-    p()
-    synonyms <- get_synonyms()
-    synonyms_product <- synonyms$product
+process_product_file <- function(product_file, paths, synonyms_product) {
     product_file_name <- tools::file_path_sans_ext(basename(product_file))
     product_file_path <-
         file.path(paths$tracker_root, product_file)
 
-    ParallelLogger::logDebug("Start process_product_file.")
-    ParallelLogger::logInfo(
+    logDebug("Start process_product_file.")
+    logInfo(
         "Current file: ",
         product_file_name
     )
@@ -337,14 +331,14 @@ process_product_file <- function(product_file, paths, p) {
             tryCatch(
                 df_product_raw <- reading_product_data_step2(df_product_raw, synonyms_product),
                 error = function(e) {
-                    ParallelLogger::logError("Could not process raw product data. Error = ", e$message, ".")
+                    logError("Could not process raw product data. Error = ", e$message, ".")
                 },
                 warning = function(w) {
-                    ParallelLogger::logWarn("Could not process raw product data. Warning = ", w$message, ".")
+                    logWarn("Could not process raw product data. Warning = ", w$message, ".")
                 }
             )
 
-            ParallelLogger::logDebug(
+            logDebug(
                 "df_product_raw dim: ",
                 dim(df_product_raw) %>% as.data.frame(),
                 "."
@@ -360,5 +354,5 @@ process_product_file <- function(product_file, paths, p) {
         output_root = paths$output_root
     )
 
-    ParallelLogger::logInfo("Finish process_product_file.")
+    logInfo("Finish process_product_file.")
 }
diff --git a/R/link_product_patient.R b/R/link_product_patient.R
index 8816d57..2a29378 100644
--- a/R/link_product_patient.R
+++ b/R/link_product_patient.R
@@ -8,13 +8,14 @@
 #' @param patient_file A string specifying the path to the patient file.
 #'
 #' @return This function does not return a value. It prints log messages about the linking process.
+#' @export
 #'
 #' @examples
 #' \dontrun{
 #' link_product_patient("path/to/product_data.parquet", "path/to/patient_data.parquet")
 #' }
 link_product_patient <- function(product_file, patient_file) {
-    ParallelLogger::logInfo("Trying to link product file ", product_file, " with patient file ", patient_file)
+    logInfo("Trying to link product file ", product_file, " with patient file ", patient_file)
 
     patient_data <- arrow::read_parquet(patient_file)
     product_data <- arrow::read_parquet(product_file)
@@ -44,7 +45,7 @@ link_product_patient <- function(product_file, patient_file) {
         tryCatch(
             {
                 if (nrow(summary_df) > 0) {
-                    ParallelLogger::logWarn(
+                    logWarn(
                         "The number of mismatched patient IDs between the product and patient data is ",
                         nrow(summary_df), ". ",
                         paste("File Name: ", summary_df$file_name,
@@ -56,18 +57,18 @@ link_product_patient <- function(product_file, patient_file) {
                 }
             },
             error = function(e) {
-                ParallelLogger::logError("Could not link csv files for product and patient data. Error: ", e$message)
+                logError("Could not link csv files for product and patient data. Error: ", e$message)
             },
             warning = function(w) {
-                ParallelLogger::logWarn("Could not link csv files for product and patient data. Warning: ", w$message)
+                logWarn("Could not link csv files for product and patient data. Warning: ", w$message)
             }
         )
     } else {
-        ParallelLogger::logInfo(
+        logInfo(
             "There are no mismatched patient IDs between the product data - ",
             product_file, " and patient data - ", patient_file
         )
     }
 
-    ParallelLogger::logInfo("Finished attempting to link product csv file with patient csv file.")
+    logInfo("Finished attempting to link product csv file with patient csv file.")
 }
diff --git a/R/read_cleaned_patient_data.R b/R/read_cleaned_patient_data.R
index b217da4..ae56e34 100644
--- a/R/read_cleaned_patient_data.R
+++ b/R/read_cleaned_patient_data.R
@@ -7,7 +7,7 @@
 #' @export
 read_cleaned_patient_data <-
     function(input_root, patient_data_files) {
-        ParallelLogger::logInfo("Start read_cleaned_patient_data")
+        logInfo("Start read_cleaned_patient_data")
 
         patient_data <- patient_data_files %>%
             purrr::map(function(patient_file) {
@@ -17,6 +17,6 @@ read_cleaned_patient_data <-
 
 
 
-        ParallelLogger::logInfo("Finish read_cleaned_patient_data")
+        logInfo("Finish read_cleaned_patient_data")
         patient_data
     }
diff --git a/R/read_patient_data.R b/R/read_patient_data.R
index 61f6839..b292f1c 100644
--- a/R/read_patient_data.R
+++ b/R/read_patient_data.R
@@ -1,9 +1,9 @@
 reading_patient_data <-
     function(tracker_data_file, columns_synonyms) {
-        ParallelLogger::logDebug("Start reading_patient_data.")
+        logDebug("Start reading_patient_data.")
         sheet_list <- readxl::excel_sheets(tracker_data_file)
         testit::assert(length(sheet_list) > 0)
-        ParallelLogger::logInfo(
+        logInfo(
             "Found ",
             length(sheet_list),
             " sheets inside the current file = ",
@@ -13,7 +13,7 @@ reading_patient_data <-
 
         month_list <-
             sheet_list[na.omit(pmatch(month.abb, sheet_list))]
-        ParallelLogger::logInfo(
+        logInfo(
             "Found ",
             length(month_list),
             " month sheets inside the current file = ",
@@ -24,18 +24,18 @@ reading_patient_data <-
 
         # Extract year
         year <- get_tracker_year(tracker_data_file, month_list)
-        ParallelLogger::logInfo("Tracker year = ", year, ".")
+        logInfo("Tracker year = ", year, ".")
         testit::assert(year %in% c(2017, 2018, 2019, 2020, 2021, 2022))
 
         tidy_tracker_list <- NULL
 
-        ParallelLogger::logDebug("Start processing sheets.")
+        logDebug("Start processing sheets.")
         for (curr_sheet in month_list) {
-            ParallelLogger::logDebug("Start processing sheet ", curr_sheet, ".")
+            logDebug("Start processing sheet ", curr_sheet, ".")
 
             df_patient <- extract_patient_data(tracker_data_file, curr_sheet, year)
             testit::assert(nrow(df_patient) > 0)
-            ParallelLogger::logDebug("df_patient dim: ", dim(df_patient) %>% as.data.frame(), ".")
+            logDebug("df_patient dim: ", dim(df_patient) %>% as.data.frame(), ".")
 
             df_patient <-
                 harmonize_patient_data_columns(df_patient, columns_synonyms)
@@ -66,15 +66,15 @@ reading_patient_data <-
                 )
 
             tidy_tracker_list[[curr_sheet]] <- df_patient
-            ParallelLogger::logDebug("Finish processing sheet ", curr_sheet, ".")
+            logDebug("Finish processing sheet ", curr_sheet, ".")
         }
 
-        ParallelLogger::logDebug("Start combining sheet data into single data frame.")
+        logDebug("Start combining sheet data into single data frame.")
         df_raw <- dplyr::bind_rows(tidy_tracker_list)
-        ParallelLogger::logDebug("Finish combining sheet data into single data frame.")
+        logDebug("Finish combining sheet data into single data frame.")
 
         if ("Patient List" %in% sheet_list) {
-            ParallelLogger::logDebug("Start extracting patient list.")
+            logDebug("Start extracting patient list.")
             patient_list <- extract_patient_data(
                 tracker_data_file,
                 "Patient List",
@@ -98,9 +98,9 @@ reading_patient_data <-
                 by = "id",
                 relationship = "many-to-one"
             )
-            ParallelLogger::logDebug("Finish extracting patient list.")
+            logDebug("Finish extracting patient list.")
         }
 
-        ParallelLogger::logInfo("Finish reading_patient_data.")
+        logInfo("Finish reading_patient_data.")
         return(df_raw)
     }
diff --git a/R/read_product_data.R b/R/read_product_data.R
index d1fc051..10d8886 100644
--- a/R/read_product_data.R
+++ b/R/read_product_data.R
@@ -4,7 +4,7 @@
 # function based on parts from run_a4d_product_data.R and helper functions
 reading_product_data_step1 <-
     function(tracker_data_file, columns_synonyms) {
-        ParallelLogger::logDebug("Start reading_product_data_step1.")
+        logDebug("Start reading_product_data_step1.")
         # rename column names to match
         colnames(columns_synonyms) <- c("name_clean", "name_to_be_matched")
 
@@ -13,13 +13,13 @@ reading_product_data_step1 <-
         month_list <- sheet_list[na.omit(pmatch(month.abb, sheet_list))]
         year <- get_tracker_year(tracker_data_file, month_list)
 
-        ParallelLogger::logDebug("Found ", length(sheet_list), " sheets: ", paste(sheet_list, collapse = ", "), ".")
-        ParallelLogger::logDebug("Found ", length(month_list), " month sheets: ", paste(month_list, collapse = ", "), ".")
+        logDebug("Found ", length(sheet_list), " sheets: ", paste(sheet_list, collapse = ", "), ".")
+        logDebug("Found ", length(month_list), " month sheets: ", paste(month_list, collapse = ", "), ".")
 
 
         # loop through all months
         for (curr_sheet in month_list) {
-            ParallelLogger::logDebug("Start processing the following sheet: ", curr_sheet)
+            logDebug("Start processing the following sheet: ", curr_sheet)
 
             # open tracker data
             tracker_data <- data.frame(
@@ -37,7 +37,7 @@ reading_product_data_step1 <-
                     grepl("Description of Support", tracker_data[, ])))
             ) {
                 # go to next month
-                ParallelLogger::logInfo("Could not find product data in tracker data. Skipping ", curr_sheet, ".")
+                logInfo("Could not find product data in tracker data. Skipping ", curr_sheet, ".")
                 next
             }
 
@@ -46,7 +46,7 @@ reading_product_data_step1 <-
 
             # If after extraction, dataframe is empty, this iteration is also skipped.
             if (all(is.na(product_df))) {
-                ParallelLogger::logInfo("Product data is empty. Skipping ", curr_sheet, ".")
+                logInfo("Product data is empty. Skipping ", curr_sheet, ".")
                 next
             }
 
@@ -69,11 +69,11 @@ reading_product_data_step1 <-
             tryCatch(
                 {
                     if (num_na_rows > 0) {
-                        ParallelLogger::logInfo(curr_sheet, " the number of rows where the patient's name is missing: ", col_released, " is not NA and ", col_released_to, " (patient's name) is NA = ", num_na_rows)
+                        logInfo(curr_sheet, " the number of rows where the patient's name is missing: ", col_released, " is not NA and ", col_released_to, " (patient's name) is NA = ", num_na_rows)
                     }
                 },
                 error = function(e) {
-                    ParallelLogger::logError(curr_sheet, " trying with num_na_rows for products. Error: ", e$message)
+                    logError(curr_sheet, " trying with num_na_rows for products. Error: ", e$message)
                 }
             )
 
@@ -83,7 +83,7 @@ reading_product_data_step1 <-
             tryCatch(
                 {
                     if (nrow(non_processed_dates) > 0) {
-                        ParallelLogger::logWarn(
+                        logWarn(
                             curr_sheet,
                             " the number of rows with non-processed dates in product_entry_date is ",
                             nrow(non_processed_dates), ": ",
@@ -92,7 +92,7 @@ reading_product_data_step1 <-
                     }
                 },
                 error = function(e) {
-                    ParallelLogger::logError(curr_sheet, " trying with non_processed_dates in product_entry_date. Error: ", e$message)
+                    logError(curr_sheet, " trying with non_processed_dates in product_entry_date. Error: ", e$message)
                 }
             )
 
@@ -107,7 +107,7 @@ reading_product_data_step1 <-
             # Check if the entry dates for the balance match the month/year on the sheet
             check_entry_dates(product_df, curr_sheet)
 
-            ParallelLogger::logDebug("Finish processing sheet: ", curr_sheet)
+            logDebug("Finish processing sheet: ", curr_sheet)
 
             # combine all months
             if (!exists("df_final")) {
@@ -121,7 +121,7 @@ reading_product_data_step1 <-
         } else {
             return(NULL)
         }
-        ParallelLogger::logDebug("Finish reading_product_data_step1.")
+        logDebug("Finish reading_product_data_step1.")
     }
 
 
@@ -151,7 +151,7 @@ count_na_rows <- function(df, units_released_col, released_to_col) {
 #'
 #' @return This function does not return a value. It logs a warning message if there are any dates in 'product_entry_date' that don't match the month/year on the sheet.
 check_entry_dates <- function(df, Sheet) {
-    ParallelLogger::logDebug("Start check_entry_dates.")
+    logDebug("Start check_entry_dates.")
     # Check if the entry dates for the balance match the month/year on the sheet
     entry_dates_df <- df %>% dplyr::filter(grepl("^[0-9]+$", product_entry_date))
 
@@ -167,14 +167,14 @@ check_entry_dates <- function(df, Sheet) {
     not_same <- entry_dates_df[entry_dates_df$ed_month != entry_dates_df$product_table_month |
         entry_dates_df$ed_year != entry_dates_df$product_table_year, ]
     if (nrow(not_same) > 0) {
-        ParallelLogger::logWarn(
+        logWarn(
             Sheet,
             " the number of dates in product_entry_date that don't match the month/year on the sheet is ",
             nrow(not_same), ": ",
             paste(not_same$ed_date, collapse = ", ")
         )
     }
-    ParallelLogger::logDebug("Finish check_entry_dates.")
+    logDebug("Finish check_entry_dates.")
 }
 
 #' @title Remove Rows with NA Values in Specified Columns.
@@ -192,7 +192,7 @@ remove_rows_with_na_columns <-
         na_rows <- apply(df[column_names], 1, function(x) all(is.na(x)))
 
         # log message
-        ParallelLogger::logInfo(paste(length(na_rows[na_rows == T]), " rows deleted out of ", nrow(df), " rows (reason: rows not containing additional info).", sep = ""))
+        logInfo(paste(length(na_rows[na_rows == T]), " rows deleted out of ", nrow(df), " rows (reason: rows not containing additional info).", sep = ""))
 
         # Return the data frame without the NA rows
         return(df[!na_rows, ])
@@ -215,7 +215,7 @@ check_negative_balance <- function(df, Sheet) {
     # Check if there are any rows in the new data frame
     if (nrow(negative_df) > 0) {
         # Log a warning message with the number of negative values and their corresponding product_balance values
-        ParallelLogger::logWarn(
+        logWarn(
             Sheet,
             " number of negative values in product_balance on the sheet is ",
             nrow(negative_df), ": ",
@@ -242,7 +242,7 @@ switch_columns_stock <-
                     "product_units_received" = "product_received_from",
                     "product_received_from" = "product_units_received"
                 )
-            ParallelLogger::logDebug("Columns product_units_received and product_received_from were switched")
+            logDebug("Columns product_units_received and product_received_from were switched")
             return(df)
         } else {
             return(df)
@@ -290,14 +290,14 @@ report_unknown_products <- function(df, Sheet, stock_list_df) {
     # Check if there are any unknown products names
     if (length(unmatched_products) > 0) {
         # Log a warning message with the number of unknown products names
-        ParallelLogger::logWarn(
+        logWarn(
             Sheet,
             " the number of unknown product names on the sheet is ",
             length(unmatched_products), ": ",
             paste(unmatched_products, collapse = ", ")
         )
     } else {
-        ParallelLogger::logInfo(Sheet, " no unknown product names on the sheet")
+        logInfo(Sheet, " no unknown product names on the sheet")
     }
 }
 
@@ -317,16 +317,16 @@ report_unknown_products <- function(df, Sheet, stock_list_df) {
 #' product_list <- load_product_reference_data("your_file.xlsx")
 #' }
 load_product_reference_data <- function(stock_summary_xlsx = "reference_data/master_tracker_variables.xlsx") {
-    ParallelLogger::logDebug("Trying to load the product list from the Stock Summary, ", stock_summary_xlsx, "...")
+    logDebug("Trying to load the product list from the Stock Summary, ", stock_summary_xlsx, "...")
     tryCatch(
         {
             product_names_df <- readxl::read_excel(stock_summary_xlsx, "Stock_Summary")
             colnames(product_names_df) <- tolower(colnames(product_names_df))
-            ParallelLogger::logDebug(nrow(product_names_df), " product names were loaded from the Stock Summary.")
+            logDebug(nrow(product_names_df), " product names were loaded from the Stock Summary.")
             return(product_names_df)
         },
         error = function(e) {
-            ParallelLogger::logError("Error in loading stock product list: ", e)
+            logError("Error in loading stock product list: ", e)
         }
     )
 }
@@ -371,7 +371,7 @@ add_product_categories <- function(inventory_data, product_category_mapping) {
 #' df <- extract_unit_capacity(df, "product")
 #' }
 extract_unit_capacity <- function(df, column_name) {
-    ParallelLogger::logDebug("Trying to extract Unit Capacity from ", column_name, " column")
+    logDebug("Trying to extract Unit Capacity from ", column_name, " column")
 
     # Extract all symbols between parentheses
     df$product_unit_capacity <- stringr::str_extract(df[[column_name]], "\\(([^)]+)\\)")
@@ -393,7 +393,7 @@ extract_unit_capacity <- function(df, column_name) {
     # Add 1 to NA values
     df$product_unit_capacity[is.na(df$product_unit_capacity)] <- 1
 
-    ParallelLogger::logDebug("Finished extracting Unit Capacity from ", column_name, " column")
+    logDebug("Finished extracting Unit Capacity from ", column_name, " column")
 
     return(df)
 }
@@ -410,7 +410,7 @@ extract_unit_capacity <- function(df, column_name) {
 #' @return Cleaned product data for one specified tracker.
 reading_product_data_step2 <-
     function(df, columns_synonyms) {
-        ParallelLogger::logDebug("Start reading_product_data_step2.")
+        logDebug("Start reading_product_data_step2.")
 
         # rename column names to match
         colnames(columns_synonyms) <- c("name_clean", "name_to_be_matched")
@@ -425,7 +425,7 @@ reading_product_data_step2 <-
 
         # loop through all months
         for (sheet_month in unique(df$product_sheet_name)) {
-            ParallelLogger::logDebug(paste("Start processing the following sheet:", sheet_month))
+            logDebug(paste("Start processing the following sheet:", sheet_month))
 
             # filter on month sheet
             product_df <- df %>%
@@ -453,7 +453,7 @@ reading_product_data_step2 <-
             product_df <- remove_rows_with_na_columns(product_df, column_names_check)
             # jump to next sheet if dataframe empty from here
             if (nrow(product_df) == 0) {
-                ParallelLogger::logDebug(paste(sheet_month, " sheet is empty after filtering and skipped", sep = ""))
+                logDebug(paste(sheet_month, " sheet is empty after filtering and skipped", sep = ""))
                 next
             }
 
@@ -527,12 +527,12 @@ reading_product_data_step2 <-
             df_final <- df_final %>%
                 rbind(product_df)
 
-            ParallelLogger::logDebug(paste("Finished processing the following sheet:", sheet_month))
+            logDebug(paste("Finished processing the following sheet:", sheet_month))
         }
 
         if (nrow(df_final) > 0) {
             return(df_final)
         } else {
-            ParallelLogger::logDebug(paste("No product data extracted for the following tracker:", df$file_name[1]))
+            logDebug(paste("No product data extracted for the following tracker:", df$file_name[1]))
         }
     }
diff --git a/man/process_patient_data.Rd b/man/process_patient_data.Rd
index e1334b2..f92d060 100644
--- a/man/process_patient_data.Rd
+++ b/man/process_patient_data.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper_scipt_1.R
+% Please edit documentation in R/helper_script_1.R
 \name{process_patient_data}
 \alias{process_patient_data}
 \title{Extract patient data.}
diff --git a/man/process_patient_file.Rd b/man/process_patient_file.Rd
index 69c9a62..21e3f60 100644
--- a/man/process_patient_file.Rd
+++ b/man/process_patient_file.Rd
@@ -4,14 +4,14 @@
 \alias{process_patient_file}
 \title{Process and clean patient raw data}
 \usage{
-process_patient_file(patient_file, paths, p)
+process_patient_file(patient_file, paths, allowed_provinces)
 }
 \arguments{
 \item{patient_file}{Path to patient raw parquet file from first script.}
 
 \item{paths}{List with paths to patient_data_cleaned and product_data_cleaned folders.}
 
-\item{p}{progressor from progressr package.}
+\item{allowed_provinces}{A named character vector with names of allowed provinces.}
 }
 \description{
 Read in the output of the first script as parquet (all character).
diff --git a/man/process_product_data.Rd b/man/process_product_data.Rd
index 0109c18..e6a18c5 100644
--- a/man/process_product_data.Rd
+++ b/man/process_product_data.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper_scipt_1.R
+% Please edit documentation in R/helper_script_1.R
 \name{process_product_data}
 \alias{process_product_data}
 \title{Extract product data.}
diff --git a/man/process_product_file.Rd b/man/process_product_file.Rd
index 07d01d9..a88ee9e 100644
--- a/man/process_product_file.Rd
+++ b/man/process_product_file.Rd
@@ -4,14 +4,14 @@
 \alias{process_product_file}
 \title{Process and clean product raw data}
 \usage{
-process_product_file(product_file, paths, p)
+process_product_file(product_file, paths, synonyms_product)
 }
 \arguments{
 \item{product_file}{Path to product raw parquet file from first script.}
 
 \item{paths}{List with paths to patient_data_cleaned and product_data_cleaned folders.}
 
-\item{p}{progressor from progressr package.}
+\item{synonyms_product}{Synonyms for product data header names.}
 }
 \description{
 A short description...
diff --git a/man/process_tracker_file.Rd b/man/process_tracker_file.Rd
index 2453d08..67c566b 100644
--- a/man/process_tracker_file.Rd
+++ b/man/process_tracker_file.Rd
@@ -1,17 +1,17 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/helper_scipt_1.R
+% Please edit documentation in R/helper_script_1.R
 \name{process_tracker_file}
 \alias{process_tracker_file}
 \title{Process a single tracker file and extract patient and product data.}
 \usage{
-process_tracker_file(tracker_file, paths, p)
+process_tracker_file(tracker_file, paths, synonyms)
 }
 \arguments{
-\item{tracker_file}{Filename of the tracler.}
+\item{tracker_file}{File name of the tracker.}
 
 \item{paths}{a list with the paths to the tracker root dir, the patient and product output dir and the root output dir.}
 
-\item{p}{progressor from progressr package.}
+\item{synonyms}{a list with the synonyms for patient and product data header names.}
 }
 \description{
 Process a single tracker file and extract patient and product data.
diff --git a/renv.lock b/renv.lock
index 5cb4660..f684844 100644
--- a/renv.lock
+++ b/renv.lock
@@ -177,21 +177,24 @@
     },
     "brew": {
       "Package": "brew",
-      "Version": "1.0-8",
+      "Version": "1.0-10",
       "Source": "Repository",
       "Repository": "CRAN",
-      "Hash": "d69a786e85775b126bddbee185ae6084"
+      "Hash": "8f4a384e19dccd8c65356dc096847b76"
     },
     "brio": {
       "Package": "brio",
-      "Version": "1.1.3",
+      "Version": "1.1.4",
       "Source": "Repository",
       "Repository": "CRAN",
-      "Hash": "976cf154dfb043c012d87cddd8bca363"
+      "Requirements": [
+        "R"
+      ],
+      "Hash": "68bd2b066e1fe780bbf62fc8bcc36de3"
     },
     "bslib": {
       "Package": "bslib",
-      "Version": "0.5.1",
+      "Version": "0.6.1",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -202,12 +205,13 @@
         "htmltools",
         "jquerylib",
         "jsonlite",
+        "lifecycle",
         "memoise",
         "mime",
         "rlang",
         "sass"
       ],
-      "Hash": "283015ddfbb9d7bf15ea9f0b5698f0d9"
+      "Hash": "c0d8599494bc7fb408cd206bbdd9cab0"
     },
     "cachem": {
       "Package": "cachem",
@@ -321,13 +325,13 @@
     },
     "curl": {
       "Package": "curl",
-      "Version": "5.1.0",
+      "Version": "5.2.0",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "R"
       ],
-      "Hash": "9123f3ef96a2c1a93927d828b2fe7d4c"
+      "Hash": "ce88d13c0b10fe88a37d9c59dba2d7f9"
     },
     "data.table": {
       "Package": "data.table",
@@ -342,17 +346,16 @@
     },
     "desc": {
       "Package": "desc",
-      "Version": "1.4.2",
+      "Version": "1.4.3",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "R",
         "R6",
         "cli",
-        "rprojroot",
         "utils"
       ],
-      "Hash": "6b9602c7ebbe87101a9c8edb6e8b6d21"
+      "Hash": "99b79fcbd6c4d1ce087f5c5c758b384f"
     },
     "devtools": {
       "Package": "devtools",
@@ -565,7 +568,7 @@
     },
     "gert": {
       "Package": "gert",
-      "Version": "2.0.0",
+      "Version": "2.0.1",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -576,7 +579,7 @@
         "sys",
         "zip"
       ],
-      "Hash": "bbbd21a253d473f4671d7dcbd6d8971f"
+      "Hash": "f70d3fe2d9e7654213a946963d1591eb"
     },
     "gh": {
       "Package": "gh",
@@ -680,7 +683,7 @@
     },
     "htmlwidgets": {
       "Package": "htmlwidgets",
-      "Version": "1.6.2",
+      "Version": "1.6.4",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -691,11 +694,11 @@
         "rmarkdown",
         "yaml"
       ],
-      "Hash": "a865aa85bcb2697f47505bfd70422471"
+      "Hash": "04291cc45198225444a397606810ac37"
     },
     "httpuv": {
       "Package": "httpuv",
-      "Version": "1.6.12",
+      "Version": "1.6.13",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -706,7 +709,7 @@
         "promises",
         "utils"
       ],
-      "Hash": "c992f75861325961c29a188b45e549f7"
+      "Hash": "d23d2879001f3d82ee9dc38a9ef53c4c"
     },
     "httr": {
       "Package": "httr",
@@ -725,7 +728,7 @@
     },
     "httr2": {
       "Package": "httr2",
-      "Version": "0.2.3",
+      "Version": "1.0.0",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -734,13 +737,15 @@
         "cli",
         "curl",
         "glue",
+        "lifecycle",
         "magrittr",
         "openssl",
         "rappdirs",
         "rlang",
+        "vctrs",
         "withr"
       ],
-      "Hash": "193bb297368afbbb42dc85784a46b36e"
+      "Hash": "e2b30f1fc039a0bab047dd52bb20ef71"
     },
     "ini": {
       "Package": "ini",
@@ -787,14 +792,14 @@
     },
     "later": {
       "Package": "later",
-      "Version": "1.3.1",
+      "Version": "1.3.2",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "Rcpp",
         "rlang"
       ],
-      "Hash": "40401c9cf2bc2259dfe83311c9384710"
+      "Hash": "a3e051d405326b8b0012377434c62b37"
     },
     "lattice": {
       "Package": "lattice",
@@ -948,7 +953,7 @@
     },
     "pkgbuild": {
       "Package": "pkgbuild",
-      "Version": "1.4.2",
+      "Version": "1.4.3",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -956,13 +961,10 @@
         "R6",
         "callr",
         "cli",
-        "crayon",
         "desc",
-        "prettyunits",
-        "processx",
-        "rprojroot"
+        "processx"
       ],
-      "Hash": "beb25b32a957a22a5c301a9e441190b3"
+      "Hash": "c0143443203205e6a2760ce553dafc24"
     },
     "pkgconfig": {
       "Package": "pkgconfig",
@@ -1044,7 +1046,7 @@
     },
     "processx": {
       "Package": "processx",
-      "Version": "3.8.2",
+      "Version": "3.8.3",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -1053,7 +1055,7 @@
         "ps",
         "utils"
       ],
-      "Hash": "3efbd8ac1be0296a46c55387aeace0f3"
+      "Hash": "82d48b1aec56084d9438dbf98087a7e9"
     },
     "profvis": {
       "Package": "profvis",
@@ -1140,14 +1142,14 @@
     },
     "ragg": {
       "Package": "ragg",
-      "Version": "1.2.6",
+      "Version": "1.2.7",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "systemfonts",
         "textshaping"
       ],
-      "Hash": "6ba2fa8740abdc2cc148407836509901"
+      "Hash": "90a1b8b7e518d7f90480d56453b4d062"
     },
     "rappdirs": {
       "Package": "rappdirs",
@@ -1352,7 +1354,7 @@
     },
     "sass": {
       "Package": "sass",
-      "Version": "0.4.7",
+      "Version": "0.4.8",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -1362,7 +1364,7 @@
         "rappdirs",
         "rlang"
       ],
-      "Hash": "6bd4d33b50ff927191ec9acbf52fd056"
+      "Hash": "168f9353c76d4c4b0a0bbf72e2c2d035"
     },
     "sessioninfo": {
       "Package": "sessioninfo",
@@ -1379,7 +1381,7 @@
     },
     "shiny": {
       "Package": "shiny",
-      "Version": "1.7.5.1",
+      "Version": "1.8.0",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -1409,7 +1411,7 @@
         "withr",
         "xtable"
       ],
-      "Hash": "5ec01cc255f2138fc2f0dc74d2b1a1a1"
+      "Hash": "3a1f41807d648a908e3c7f0334bf85e6"
     },
     "snow": {
       "Package": "snow",
@@ -1508,7 +1510,7 @@
     },
     "testthat": {
       "Package": "testthat",
-      "Version": "3.2.0",
+      "Version": "3.2.1",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
@@ -1519,7 +1521,6 @@
         "cli",
         "desc",
         "digest",
-        "ellipsis",
         "evaluate",
         "jsonlite",
         "lifecycle",
@@ -1534,7 +1535,7 @@
         "waldo",
         "withr"
       ],
-      "Hash": "877508719fcb8c9525eccdadf07a5102"
+      "Hash": "4767a686ebe986e6cb01d075b3f09729"
     },
     "textshaping": {
       "Package": "textshaping",
@@ -1567,6 +1568,17 @@
       ],
       "Hash": "a84e2cc86d07289b3b6f5069df7a004c"
     },
+    "tictoc": {
+      "Package": "tictoc",
+      "Version": "1.2",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "methods"
+      ],
+      "Hash": "2c6c63f3f4f103d41cb2aae1ab1c7ce1"
+    },
     "tidyr": {
       "Package": "tidyr",
       "Version": "1.3.0",
@@ -1619,13 +1631,13 @@
     },
     "tinytex": {
       "Package": "tinytex",
-      "Version": "0.48",
+      "Version": "0.49",
       "Source": "Repository",
       "Repository": "CRAN",
       "Requirements": [
         "xfun"
       ],
-      "Hash": "8f96d229b7311beb32b94cf413b13f84"
+      "Hash": "5ac22900ae0f386e54f1c307eca7d843"
     },
     "tzdb": {
       "Package": "tzdb",
diff --git a/scripts/pre_commit.R b/scripts/pre_commit.R
index b1df661..e5f2ba7 100644
--- a/scripts/pre_commit.R
+++ b/scripts/pre_commit.R
@@ -11,3 +11,5 @@ do.call(styler::style_dir, c(list(path = here::here("tests")), styler_confs))
 source(here::here("reference_data", "build_package_data.R"))
 devtools::document()
 devtools::test()
+devtools::uninstall()
+devtools::install()
diff --git a/scripts/run_script_1_extract_raw_data.R b/scripts/run_script_1_extract_raw_data.R
index febdb0f..e536bec 100644
--- a/scripts/run_script_1_extract_raw_data.R
+++ b/scripts/run_script_1_extract_raw_data.R
@@ -1,12 +1,17 @@
+library(furrr)
+library(progressr)
+library(tictoc)
+
 options(readxl.show_progress = FALSE)
-future::plan("multisession")
+plan("multisession")
 
 
 paths <- a4d::init_paths(c("patient_data_raw", "product_data_raw"), delete = TRUE)
 a4d::setup_logger(paths$output_root, "script1")
 tracker_files <- a4d::get_files(paths$tracker_root)
+synonyms <- a4d::get_synonyms()
 
-ParallelLogger::logInfo(
+logInfo(
     "Found ",
     length(tracker_files),
     " xlsx files under ",
@@ -14,21 +19,26 @@ ParallelLogger::logInfo(
     "."
 )
 
-ParallelLogger::logInfo("Start processing tracker files.")
+logInfo("Start processing tracker files.")
 
-progressr::with_progress({
-    p <- progressr::progressor(steps = length(tracker_files))
+tic()
+with_progress({
+    p <- progressor(steps = length(tracker_files), label = "Tracker files")
 
-    result <- furrr::future_map(
+    future_map(
         tracker_files,
-        a4d::process_tracker_file,
-        paths = paths,
-        p = p
+        function(tracker_file) {
+            a4d::process_tracker_file(tracker_file, paths, synonyms)
+            p()
+            return()
+        },
+        .options = furrr_options(seed = NULL, packages = c("a4d", "ParallelLogger"), scheduling = FALSE)
     )
 })
+toc()
 
-ParallelLogger::logInfo("Finish processing all tracker files.")
+logInfo("Finish processing all tracker files.")
 
-ParallelLogger::clearLoggers()
+clearLoggers()
 
-future::plan("sequential")
+plan("sequential")
diff --git a/scripts/run_script_2_clean_data.R b/scripts/run_script_2_clean_data.R
index 92ab7d2..96ca452 100644
--- a/scripts/run_script_2_clean_data.R
+++ b/scripts/run_script_2_clean_data.R
@@ -1,19 +1,25 @@
-options(readxl.show_progress = FALSE)
-future::plan("multisession")
+library(furrr)
+library(progressr)
+library(tictoc)
+
+plan("multisession")
 
 
 paths <- a4d::init_paths(c("patient_data_cleaned", "product_data_cleaned"), delete = TRUE)
 a4d::setup_logger(paths$output_root, "script2")
 patient_data_files <- a4d::get_files(paths$tracker_root, pattern = "patient_raw.parquet$")
 product_data_files <- a4d::get_files(paths$tracker_root, pattern = "product_raw.parquet$")
-ParallelLogger::logInfo(
+allowed_provinces <- a4d::get_allowed_provinces()
+synonyms <- a4d::get_synonyms()
+
+logInfo(
     "Found ",
     length(patient_data_files),
     " patient csv files under ",
     paths$tracker_root,
     "."
 )
-ParallelLogger::logInfo(
+logInfo(
     "Found ",
     length(product_data_files),
     " product csv files under ",
@@ -21,36 +27,46 @@ ParallelLogger::logInfo(
     "."
 )
 
-ParallelLogger::logInfo("Start processing patient csv files.")
+logInfo("Start processing patient csv files.")
 
-progressr::with_progress({
-    p <- progressr::progressor(steps = length(patient_data_files))
+tic()
+with_progress({
+    p <- progressor(steps = length(patient_data_files))
 
-    result <- furrr::future_map(
+    result <- future_map(
         patient_data_files,
-        a4d::process_patient_file,
-        paths = paths,
-        p = p
+        function(patient_file) {
+            a4d::process_patient_file(patient_file, paths, allowed_provinces)
+            p()
+            return()
+        },
+        .options = furrr_options(seed = NULL, packages = c("a4d", "ParallelLogger"), scheduling = FALSE)
     )
 })
+toc()
 
-ParallelLogger::logInfo("Finish processing all patient csv files.")
+logInfo("Finish processing all patient csv files.")
 
-ParallelLogger::logDebug("Start processing product csv files.")
+logDebug("Start processing product csv files.")
 
-progressr::with_progress({
-    p <- progressr::progressor(steps = length(product_data_files))
+tic()
+with_progress({
+    p <- progressor(steps = length(product_data_files))
 
-    result <- furrr::future_map(
+    result <- future_map(
         product_data_files,
-        a4d::process_product_file,
-        paths = paths,
-        p = p
+        function(patient_file) {
+            a4d::process_product_file(patient_file, paths, synonyms_product = synonyms$product)
+            p()
+            return()
+        },
+        .options = furrr_options(seed = NULL, packages = c("a4d", "ParallelLogger"), scheduling = FALSE)
     )
 })
+toc()
 
-ParallelLogger::logInfo("Finish processing all csv files.")
+logInfo("Finish processing all csv files.")
 
-ParallelLogger::clearLoggers()
+clearLoggers()
 
-future::plan("sequential")
+plan("sequential")
diff --git a/scripts/run_script_3_create_tables.R b/scripts/run_script_3_create_tables.R
index 4cd6216..4f6a6ad 100644
--- a/scripts/run_script_3_create_tables.R
+++ b/scripts/run_script_3_create_tables.R
@@ -4,133 +4,132 @@ ERROR_VAL_NUMERIC <<- 999999
 ERROR_VAL_CHARACTER <<- "Undefined"
 ERROR_VAL_DATE <<- "9999-09-09"
 
-main <- function() {
-    paths <- init_paths(c("tables"), delete = TRUE)
-    setup_logger(paths$output_root, "script3")
-    patient_data_files <- get_files(file.path(paths$output_root, "patient_data_cleaned"), pattern = "\\.parquet$")
-    product_data_files <- get_files(file.path(paths$output_root, "product_data_cleaned"), pattern = "\\.parquet$")
-    ParallelLogger::logInfo(
-        "Found ",
-        length(patient_data_files),
-        " patient csv files under ",
-        paths$tracker_root,
-        "."
-    )
-    ParallelLogger::logInfo(
-        "Found ",
-        length(product_data_files),
-        " product csv files under ",
-        paths$tracker_root,
-        "."
-    )
 
-    ParallelLogger::logInfo("Start creating table csv files.")
+paths <- a4d::init_paths(c("tables"), delete = TRUE)
+a4d::setup_logger(paths$output_root, "script3")
+patient_data_files <- a4d::get_files(file.path(paths$output_root, "patient_data_cleaned"), pattern = "\\.parquet$")
+product_data_files <- a4d::get_files(file.path(paths$output_root, "product_data_cleaned"), pattern = "\\.parquet$")
 
-    logfile <- "table_patient_data_static"
-    with_file_logger(logfile,
-        {
-            tryCatch(
-                {
-                    create_table_patient_data_static(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
-                },
-                error = function(e) {
-                    ParallelLogger::logError("Could not create table csv for static patient data. Error: ", e$message)
-                },
-                warning = function(w) {
-                    ParallelLogger::logWarn("Could not create table csv for static patient data. Error: ", w$message)
-                }
-            )
-        },
-        output_root = paths$output_root
-    )
+logInfo(
+    "Found ",
+    length(patient_data_files),
+    " patient csv files under ",
+    paths$tracker_root,
+    "."
+)
+logInfo(
+    "Found ",
+    length(product_data_files),
+    " product csv files under ",
+    paths$tracker_root,
+    "."
+)
 
-    logfile <- "table_patient_data_monthly"
-    with_file_logger(logfile,
-        {
-            tryCatch(
-                {
-                    create_table_patient_data_monthly(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
-                },
-                error = function(e) {
-                    ParallelLogger::logError("Could not create table csv for monthly patient data. Error: ", e$message)
-                },
-                warning = function(w) {
-                    ParallelLogger::logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
-                }
-            )
-        },
-        output_root = paths$output_root
-    )
+logInfo("Start creating table csv files.")
 
-    logfile <- "table_longitudinal_data_hba1c"
-    with_file_logger(logfile,
-        {
-            tryCatch(
-                {
-                    create_table_longitudinal_data(
-                        patient_data_files,
-                        file.path(paths$output_root, "patient_data_cleaned"),
-                        paths$tables,
-                        "hba1c_updated",
-                        "hba1c"
-                    )
-                },
-                error = function(e) {
-                    ParallelLogger::logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
-                },
-                warning = function(w) {
-                    ParallelLogger::logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
-                }
-            )
-        },
-        output_root = paths$output_root
-    )
+logfile <- "table_patient_data_static"
+with_file_logger(logfile,
+    {
+        tryCatch(
+            {
+                a4d::create_table_patient_data_static(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
+            },
+            error = function(e) {
+                logError("Could not create table csv for static patient data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create table csv for static patient data. Error: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
+)
 
-    logfile <- "table_product_data"
-    with_file_logger(logfile,
-        {
-            tryCatch(
-                {
-                    create_table_product_data(file.path(paths$output_root, "product_data_cleaned"), paths$tables)
-                },
-                error = function(e) {
-                    ParallelLogger::logError("Could not create table for product data. Error: ", e$message)
-                },
-                warning = function(w) {
-                    ParallelLogger::logWarn("Could not create table for product data. Warning: ", w$message)
-                }
-            )
-        },
-        output_root = paths$output_root
-    )
+logfile <- "table_patient_data_monthly"
+with_file_logger(logfile,
+    {
+        tryCatch(
+            {
+                a4d::create_table_patient_data_monthly(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
+            },
+            error = function(e) {
+                logError("Could not create table csv for monthly patient data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
+)
 
-    ParallelLogger::logInfo("Finish creating table files.")
+logfile <- "table_longitudinal_data_hba1c"
+with_file_logger(logfile,
+    {
+        tryCatch(
+            {
+                a4d::create_table_longitudinal_data(
+                    patient_data_files,
+                    file.path(paths$output_root, "patient_data_cleaned"),
+                    paths$tables,
+                    "hba1c_updated",
+                    "hba1c"
+                )
+            },
+            error = function(e) {
+                logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
+)
 
-    ParallelLogger::logInfo("Trying to link files for product and patient data.")
+logfile <- "table_product_data"
+with_file_logger(logfile,
+    {
+        tryCatch(
+            {
+                a4d::create_table_product_data(file.path(paths$output_root, "product_data_cleaned"), paths$tables)
+            },
+            error = function(e) {
+                logError("Could not create table for product data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create table for product data. Warning: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
+)
 
-    logfile <- "link_product_patient_data"
+logInfo("Finish creating table files.")
 
-    with_file_logger(logfile,
-        {
-            tryCatch(
-                {
-                    link_product_patient(
-                        file.path(paths$tables, "product_data.parquet"),
-                        file.path(paths$tables, "patient_data_monthly.parquet")
-                    )
-                },
-                error = function(e) {
-                    ParallelLogger::logError("Could not link files for product and patient data. Error: ", e$message)
-                },
-                warning = function(w) {
-                    ParallelLogger::logWarn("Could not link files for product and patient data. Warning: ", w$message)
-                }
-            )
-        },
-        output_root = paths$output_root
-    )
+logInfo("Trying to link files for product and patient data.")
 
-    ParallelLogger::logInfo("Finished linking files for product and patient data.")
-}
+logfile <- "link_product_patient_data"
+
+with_file_logger(logfile,
+    {
+        tryCatch(
+            {
+                a4d::link_product_patient(
+                    file.path(paths$tables, "product_data.parquet"),
+                    file.path(paths$tables, "patient_data_monthly.parquet")
+                )
+            },
+            error = function(e) {
+                logError("Could not link files for product and patient data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not link files for product and patient data. Warning: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
+)
+
+logInfo("Finished linking files for product and patient data.")
 
-main()

From bc15cd71ae345786e18105695cb0057a3be8807b Mon Sep 17 00:00:00 2001
From: Luke Boelling <lboel@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:23:32 +0100
Subject: [PATCH 5/8] Update Namespace Run Styler on helper Main and script 3

Export function to convert csv to parquet for static clinic data
---
 NAMESPACE                            |   6 +
 R/helper_main.R                      | 201 +++++++++++++-------------
 R/sysdata.rda                        | Bin 2763 -> 0 bytes
 scripts/run_script_3_create_tables.R | 207 +++++++++++++--------------
 4 files changed, 210 insertions(+), 204 deletions(-)
 delete mode 100644 R/sysdata.rda

diff --git a/NAMESPACE b/NAMESPACE
index 298a603..c69c3f1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,7 +3,12 @@
 export(calculate_most_frequent)
 export(convert_to)
 export(correct_decimal_sign)
+export(create_table_longitudinal_data)
+export(create_table_patient_data_monthly)
+export(create_table_patient_data_static)
+export(create_table_product_data)
 export(cut_numeric_value)
+export(export_data_as_parquet)
 export(extract_patient_data)
 export(extract_unit_capacity)
 export(extract_year_from_age)
@@ -21,6 +26,7 @@ export(get_tracker_year)
 export(harmonize_patient_data_columns)
 export(id_2_county_hospisal)
 export(init_paths)
+export(link_product_patient)
 export(process_patient_data)
 export(process_patient_file)
 export(process_product_data)
diff --git a/R/helper_main.R b/R/helper_main.R
index 97bd8c0..1bb1bc6 100644
--- a/R/helper_main.R
+++ b/R/helper_main.R
@@ -14,36 +14,36 @@
 #' @return A list with tracker_root_path and output_root path
 #' @export
 init_paths <- function(names, output_dir_name = "output", delete = FALSE) {
-    paths <- list()
-    tracker_root_path <- select_A4D_directory()
-    paths$tracker_root <- tracker_root_path
-
-    output_root <- file.path(
-        tracker_root_path,
-        output_dir_name
+  paths <- list()
+  tracker_root_path <- select_A4D_directory()
+  paths$tracker_root <- tracker_root_path
+
+  output_root <- file.path(
+    tracker_root_path,
+    output_dir_name
+  )
+
+  paths$output_root <- output_root
+
+  for (name in names) {
+    subdir <- file.path(
+      tracker_root_path,
+      output_dir_name,
+      name
     )
 
-    paths$output_root <- output_root
-
-    for (name in names) {
-        subdir <- file.path(
-            tracker_root_path,
-            output_dir_name,
-            name
-        )
-
-        if (fs::dir_exists(subdir)) {
-            if (delete) {
-                fs::dir_delete(subdir)
-            }
-        }
+    if (fs::dir_exists(subdir)) {
+      if (delete) {
+        fs::dir_delete(subdir)
+      }
+    }
 
-        fs::dir_create(subdir)
+    fs::dir_create(subdir)
 
-        paths[[name]] <- subdir
-    }
+    paths[[name]] <- subdir
+  }
 
-    paths
+  paths
 }
 
 
@@ -60,9 +60,9 @@ init_paths <- function(names, output_dir_name = "output", delete = FALSE) {
 #' @return A vector with file names.
 #' @export
 get_files <- function(tracker_root, pattern = "\\.xlsx$") {
-    tracker_files <- list.files(path = tracker_root, recursive = T, pattern = pattern)
-    tracker_files <-
-        tracker_files[stringr::str_detect(tracker_files, "~", negate = T)]
+  tracker_files <- list.files(path = tracker_root, recursive = T, pattern = pattern)
+  tracker_files <-
+    tracker_files[stringr::str_detect(tracker_files, "~", negate = T)]
 }
 
 
@@ -74,14 +74,14 @@ get_files <- function(tracker_root, pattern = "\\.xlsx$") {
 #' @return A list with both patient and product data synonyms as tibble.
 #' @export
 get_synonyms <- function() {
-    ## Extract synonyms for products and patients
-    ## If you encounter new columns, just add the synonyms to these YAML files
-    synonyms_patient <-
-        read_column_synonyms(synonym_file = "synonyms_patient.yaml")
-    synonyms_product <-
-        read_column_synonyms(synonym_file = "synonyms_product.yaml")
-
-    list(patient = synonyms_patient, product = synonyms_product)
+  ## Extract synonyms for products and patients
+  ## If you encounter new columns, just add the synonyms to these YAML files
+  synonyms_patient <-
+    read_column_synonyms(synonym_file = "synonyms_patient.yaml")
+  synonyms_product <-
+    read_column_synonyms(synonym_file = "synonyms_product.yaml")
+
+  list(patient = synonyms_patient, product = synonyms_product)
 }
 
 
@@ -104,21 +104,21 @@ get_synonyms <- function() {
 #' read_column_synonyms(synonym_file = "synonyms_product.yaml")
 #' }
 read_column_synonyms <- function(synonym_file, path_prefixes = c("reference_data", "synonyms")) {
-    path <- do.call(file.path, as.list(c(path_prefixes, synonym_file)))
-    columns_synonyms <-
-        yaml::read_yaml(path) %>%
-        unlist() %>%
-        as.data.frame() %>%
-        tibble::rownames_to_column() %>%
-        # remove digits that were created when converting to data frame
-        dplyr::mutate(
-            rowname = stringr::str_replace(rowname, pattern = "[:digit:]$", "")
-        ) %>%
-        dplyr::rename(
-            "variable_name" = "rowname",
-            "tracker_name" = "."
-        ) %>%
-        dplyr::as_tibble()
+  path <- do.call(file.path, as.list(c(path_prefixes, synonym_file)))
+  columns_synonyms <-
+    yaml::read_yaml(path) %>%
+    unlist() %>%
+    as.data.frame() %>%
+    tibble::rownames_to_column() %>%
+    # remove digits that were created when converting to data frame
+    dplyr::mutate(
+      rowname = stringr::str_replace(rowname, pattern = "[:digit:]$", "")
+    ) %>%
+    dplyr::rename(
+      "variable_name" = "rowname",
+      "tracker_name" = "."
+    ) %>%
+    dplyr::as_tibble()
 }
 
 
@@ -132,31 +132,31 @@ read_column_synonyms <- function(synonym_file, path_prefixes = c("reference_data
 #' @examples
 #' \dontrun{
 #' export_data(
-#'     data = df_raw_product,
-#'     filename = tracker_name,
-#'     output_root = output_root,
-#'     suffix = "_product_data"
+#'   data = df_raw_product,
+#'   filename = tracker_name,
+#'   output_root = output_root,
+#'   suffix = "_product_data"
 #' )
 #' }
 export_data <- function(data, filename, output_root, suffix) {
-    logDebug("Start export_data. Suffix = ", suffix, ".")
-    data %>%
-        write.csv(
-            file =
-                file.path(
-                    output_root,
-                    paste0(
-                        filename,
-                        suffix,
-                        ".csv"
-                    )
-                ),
-            row.names = F,
-            na = "",
-            fileEncoding = "UTF-16LE",
-            quote = T
-        )
-    logInfo("Finish export_data. Suffix = ", suffix, ".")
+  logDebug("Start export_data. Suffix = ", suffix, ".")
+  data %>%
+    write.csv(
+      file =
+        file.path(
+          output_root,
+          paste0(
+            filename,
+            suffix,
+            ".csv"
+          )
+        ),
+      row.names = F,
+      na = "",
+      fileEncoding = "UTF-16LE",
+      quote = T
+    )
+  logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -170,19 +170,20 @@ export_data <- function(data, filename, output_root, suffix) {
 #' @examples
 #' \dontrun{
 #' export_data(
-#'     data = df_raw_product,
-#'     filename = tracker_name,
-#'     output_root = output_root,
-#'     suffix = "_product_data"
+#'   data = df_raw_product,
+#'   filename = tracker_name,
+#'   output_root = output_root,
+#'   suffix = "_product_data"
 #' )
 #' }
+#' @export
 export_data_as_parquet <- function(data, filename, output_root, suffix) {
-    logDebug("Start export_data. Suffix = ", suffix, ".")
-    data %>%
-        arrow::write_parquet(
-            sink = file.path(output_root, paste0(filename, suffix, ".parquet")),
-        )
-    logInfo("Finish export_data. Suffix = ", suffix, ".")
+  logDebug("Start export_data. Suffix = ", suffix, ".")
+  data %>%
+    arrow::write_parquet(
+      sink = file.path(output_root, paste0(filename, suffix, ".parquet")),
+    )
+  logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -193,20 +194,20 @@ export_data_as_parquet <- function(data, filename, output_root, suffix) {
 #'
 #' @return tibble with patient data
 read_raw_csv <- function(file) {
-    logDebug("Start reading data with read_csv.")
-    df_patient_raw <- readr::read_csv(
-        file,
-        name_repair = "check_unique",
-        progress = FALSE,
-        show_col_types = FALSE,
-        col_types = readr::cols(.default = "c"),
-        locale = readr::locale(encoding = "UTF-16LE")
-    )
-    logDebug("Finished loading data with read_csv.")
-    logInfo("Dim: ", dim(df_patient_raw))
-    logInfo("Columns: ", spec(df_patient_raw))
-
-    df_patient_raw
+  logDebug("Start reading data with read_csv.")
+  df_patient_raw <- readr::read_csv(
+    file,
+    name_repair = "check_unique",
+    progress = FALSE,
+    show_col_types = FALSE,
+    col_types = readr::cols(.default = "c"),
+    locale = readr::locale(encoding = "UTF-16LE")
+  )
+  logDebug("Finished loading data with read_csv.")
+  logInfo("Dim: ", dim(df_patient_raw))
+  logInfo("Columns: ", spec(df_patient_raw))
+
+  df_patient_raw
 }
 
 
@@ -218,7 +219,7 @@ read_raw_csv <- function(file) {
 #' @return A named character vector with all allowed provinces.
 #' @export
 get_allowed_provinces <- function() {
-    ## Should new countries and provinces be added, update the YAML file
-    provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") |> unlist()
-    return(provinces)
+  ## Should new countries and provinces be added, update the YAML file
+  provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") |> unlist()
+  return(provinces)
 }
diff --git a/R/sysdata.rda b/R/sysdata.rda
deleted file mode 100644
index be6b002cff04d1329d0fdca86a74da91bb6a0978..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2763
zcmZvZXFL=P0Edl`z3FVx*|U(n#~G27vz=8qWIH($_nM)*iwntnMqPF~cV=c}WM@a>
zIHNM+$avT9{nr1(^WpjS{QjaD7peXo?RNffh+r>ouRjQFl8)RWGUm^TxX>E)oEpAv
zyotPpI}vxbV*CWU^u+rcdPT8Lzw}2gHbo?zhCWO-wr<P#OM}@$X=`7K(E9ObeZy1Z
zu{QB&zvxBDpkTv?B2Y4XZE9`kf%abR^3Iue35AFmSmg`1%7_ljfKk-)^vQ88c+`M`
z!oyH00$Q$pc(r;>4|3`n{N>F0`MK+jEmvoc<b>#3B^rJA*WE@xA6)^MT}yS@3*CGz
z%tbIiafjBtO%qjB%^X77m_y|%6lU?Qlqs+AaHsX7!R>Il^2Q{}o+97_g5Nt-rZAJN
zA@*_7=0#RYsnzf^X06S{{mD2IB0)lQ&(NZ`qs7?o7N6Gx>X3dqM)2VYXeG*+)cES>
zso09*ktqDayTX*unlJZ8XPG^|_BCOC6gC$wA4M5DaZ=ZGNalHs4K<bp`S(VwAdEih
zr@zwYgy=+oVh7E-n2ol*vum)=GfivYvt^RSm#53%d<MaY!N>P(S&Jino~!&?RB@TR
zLJbIZi|Mj}zGt^bewx>xh<6A)r3lWkv6;@bHn7_+NWFatnA67v2R8qrnJn*;a@hmQ
z#82<XZZXm_8o@wr-<SXa&y7dL6L_@Zq<mt+7yV=2ooe3BM2YcdpSA70oZ&m%R7-^2
z|D=bkBMa&1oShvtw7Z<kjJ;701HIbT>^^)Yn1RTjo6^c+&@hgx-{K<Z|DeahHLxQk
zH(X~#js~S9Le)DW+!#ok4Bgyo{P(N_pV+zT&lly?ai`r3EGGVGtms)?%ANES5A5i#
zbMW*Uws}mgE%4Zo8ycya++~gtjHIbb-XjRh-m_e?;?K2Ja(~q&xl}rz8@C|EkBuy&
z-plbxA(Bn7oAzBLAls0_{Duih2knQglh6;u&ysAT&}CGjM06+~z#3beaifgyEzQjg
zYahu^-c>e6y@C+V_m(^E@**e>A=c^CV*jA9D|t;y*XaEX%p|C|wei!)R&|&8jD^&D
z;cGo@Ln}?FZkA-It5u<fz|IG^#swil4gQ!0%9_mao>ii8U_i{q$C4LD)g0%g0pOAg
z!#!>2YGPwXiO9303bL+>oYihqOssUOZ7#QMncQE2Y1>@HQrr0LiH|yIoqiiGP0fTb
zV1_Ar7n@eSQ&aZKu4kJBNWZm39;|w2`?L30%Bg?BNxLe|_$XT;43X=_kFj=1KN&9g
zM>Q?_Rd(3ZO2O_q!j8+v7yatjf!hA{4<+09w=M!%QOY$loI2p(*ozi9n$MKi43X?#
zy;4%8SG41IuPH<#lbaueiI1yW+c@Zu6N`#sx0A<xP#cmg9aTk{=NkY<DlJXnfDwqb
z={HhzYvTM0-Yuk$LVJ|9Uq^M9N9Q21TTv*6BM>~w364B#f4H8GmQlkxd7(I$(##{O
z4J#G7Y8<n%MGP5fog76Mvqk?j7Uv1-oM*h?kLkX^Mz8{WQ!5ZuG;#UEf2KT1636s1
z(-FWp|G<9apbvxap%};?U~1b1-C}mhvbrR5r3<>2DpRi8cU?cO>a89yG>_k%-PtZ!
z%t?SB&ye-qcQbsB<<niJqC!H>#)eo@%Z|5)@CC@~X6GcEb7^|MwN_`A16VbW!4cQ;
z_i-?I@%h<I<XeI*By>iO?{Kj-K3XAf9+MHYwy-&;@$S1{b5!B_0qLoM(+q8NzRB+Q
zh0iC6QVY$S43D7MF#@$Gw{LjF{^Fs>2Zx+}I}nu=xUM`RaFS4pFHb;uPc7>C5<=ox
z**UK(9VY$SpOHs^$3he%3(5ExQ*a_2Q3BqV5u!4St#Bk#`AhfRCH);z$3W(MPEbN^
zYZF^x9U})lUUSdli>?#tmn)>}=SkFJM!qWWGBxpFcBHSq)GV~VZ1<ZQF)VkN$QEp<
z9OEdnBb>(1p`oo$b@_`ewd>k|F8`>kTKTZKbIUESe0tk!#0QHXB3y#3U%+xsalZt#
zCM21+F7>}Qs`zezM(UXsVE`{IuU;vzzVBfZV$clVb5R+!25GjPU8HYWS_=-SRuc!-
zp-!Do$;_NTkUXeflc+7qQX*!gD1MY~b-M;Rza&WWL84u2d=0g)6R0x^rVoTG;*Uoq
zLyP-tJw%8~-42J1T?v<$G}xCuQU8dOd8nPU^=?o^43WhSipj5Z-54ZIiI{?3F3Z^3
zKW4D}F~r;UVmVRq2b^v>kaN1(uICT5^;_WTq{!3W4aEt74_YhDs>Nc(sWuPoU64j1
zdW=ur2<F5^BC$|dIU;TK0BP&H;m}5!(lxJ;imx&l0=ywxCC={8yI&JhhV=vs{fzDI
zRa77_3ib!`aj`QQR@tm7;HhhH-H8Dyd8to`gO=ex(VA$=F4r#EM7s<pAitYbnwgRT
z=qK@TOt40j3g1%A&9lEW#HIYK<erDor7XbHctztY%5a&10=9QiF`Syn8ivCFZY}XM
zUUG-U?A=|a6vAO!-cyPT%|w{^lNUq-izcv3h4GU$X|4@AJnTHZSWTm#o&sqc*?*Iy
zpX>$Kf#`;nJtt<0I!+uOy$rr=x7mB&=_(OEUZM^ilWIBc*Ye%lkX`uR%I2We^7cSV
zelGfyMHQWA<F9faKD=Y%x);f$#IL%!&QZkbWL`NXasNhOdxpbG#v>~CP{kzx+K;;_
zq8W>wfsX1SVyDVWnANQVKC34rfx=SCMV`kM9KXqV1Wf4bW)7_rM(7>Ub(_oQ5&m@G
zYLRQX(~9(V49Z%}YvYCFiQ|v$NF{`OJF_n9LKAF)_)qZwW-UGzc?`I=)~>^dmsuN3
z!I|)T&ZXPm;c>gkVyeR1V~<Y9I`I0M-{kRggM8xsEsP=RJbu5!Er>d=-|t`rY2fku
z4M^#ot~W7qOyQ=GYUx2<L?c$&k>@{I!4!TQQq4q)D~a;S8#gQOu@Ll|tsOJF667%c
z{>CiFtATn`!qq#P-ekZ)AK35x8`V2kyl-F4HE`0^jhptL%=h)#>{HJPx;$qTMK^QX
z=j+d&V4e%Z>U{m+kvdP$j_yYY1QXwD@N@?aa8d=A1&XwO?QJiN+Q2yUmTLW{Sgiv*
z?P*a0n2GjMdZ3yzE)n#v;zs3TCb~<3Ks6Oy8py^vz{~z$)mh31#4F<vAO-6Hh&?mP
z88h*Zs`4CT`WyDJ2>yeBp@ovuF6V!aK#K$U<mDY|?Rq-d_*OKPIvRM(Dy5sSm(smt
z)61lJO>wgQSy<`8rp`*a{~@29JxOw^nyeJgdr~DaXV8!a)$#Zz@4J`_uw-fGz0v;{
z=RBLNR13HpCF_PL_U>qZlt$+}_FwNKmevb%chGOJ>*-$7={V_JtV3A6es+BHVp?AW
zI7@q;6}vON*R?0rrlBQOP676YQPu44W-t^CvSP)<hv6}iq95Mr#&hI8SkpB-uI8^@
zI!Q--goTK?w=G7ucpiMwW$xSB<Z$Dn>#U!w`|hSnusUZx35d2hy32SPv(|RZ4%|P=
J`6@|8^%oJ1MwkEq

diff --git a/scripts/run_script_3_create_tables.R b/scripts/run_script_3_create_tables.R
index 4ffebbc..995c0bd 100644
--- a/scripts/run_script_3_create_tables.R
+++ b/scripts/run_script_3_create_tables.R
@@ -11,117 +11,117 @@ patient_data_files <- a4d::get_files(file.path(paths$output_root, "patient_data_
 product_data_files <- a4d::get_files(file.path(paths$output_root, "product_data_cleaned"), pattern = "\\.parquet$")
 
 logInfo(
-    "Found ",
-    length(patient_data_files),
-    " patient csv files under ",
-    paths$tracker_root,
-    "."
+  "Found ",
+  length(patient_data_files),
+  " patient csv files under ",
+  paths$tracker_root,
+  "."
 )
 logInfo(
-    "Found ",
-    length(product_data_files),
-    " product csv files under ",
-    paths$tracker_root,
-    "."
+  "Found ",
+  length(product_data_files),
+  " product csv files under ",
+  paths$tracker_root,
+  "."
 )
 
 logInfo("Start creating table csv files.")
 
 logfile <- "table_patient_data_static"
 with_file_logger(logfile,
-    {
-        tryCatch(
-            {
-                a4d::create_table_patient_data_static(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
-            },
-            error = function(e) {
-                logError("Could not create table csv for static patient data. Error: ", e$message)
-            },
-            warning = function(w) {
-                logWarn("Could not create table csv for static patient data. Error: ", w$message)
-            }
-        )
-    },
-    output_root = paths$output_root
+  {
+    tryCatch(
+      {
+        a4d::create_table_patient_data_static(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
+      },
+      error = function(e) {
+        logError("Could not create table csv for static patient data. Error: ", e$message)
+      },
+      warning = function(w) {
+        logWarn("Could not create table csv for static patient data. Error: ", w$message)
+      }
+    )
+  },
+  output_root = paths$output_root
 )
 
 logfile <- "table_patient_data_monthly"
 with_file_logger(logfile,
-    {
-        tryCatch(
-            {
-                a4d::create_table_patient_data_monthly(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
-            },
-            error = function(e) {
-                logError("Could not create table csv for monthly patient data. Error: ", e$message)
-            },
-            warning = function(w) {
-                logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
-            }
-        )
-    },
-    output_root = paths$output_root
+  {
+    tryCatch(
+      {
+        a4d::create_table_patient_data_monthly(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
+      },
+      error = function(e) {
+        logError("Could not create table csv for monthly patient data. Error: ", e$message)
+      },
+      warning = function(w) {
+        logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
+      }
+    )
+  },
+  output_root = paths$output_root
 )
 
 logfile <- "table_longitudinal_data_hba1c"
 with_file_logger(logfile,
-    {
-        tryCatch(
-            {
-                a4d::create_table_longitudinal_data(
-                    patient_data_files,
-                    file.path(paths$output_root, "patient_data_cleaned"),
-                    paths$tables,
-                    "hba1c_updated",
-                    "hba1c"
-                )
-            },
-            error = function(e) {
-                logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
-            },
-            warning = function(w) {
-                logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
-            }
+  {
+    tryCatch(
+      {
+        a4d::create_table_longitudinal_data(
+          patient_data_files,
+          file.path(paths$output_root, "patient_data_cleaned"),
+          paths$tables,
+          "hba1c_updated",
+          "hba1c"
         )
-    },
-    output_root = paths$output_root
+      },
+      error = function(e) {
+        logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
+      },
+      warning = function(w) {
+        logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
+      }
+    )
+  },
+  output_root = paths$output_root
 )
 
-    logfile <- "clinic_data_static"
-    with_file_logger(logfile,
-                     {
-                         tryCatch(
-                             {
-                                 a4d::export_data_as_parquet(data = read.csv("reference_data/clinic_data_static.csv"),filename = "clinic_data_static",output_root = paths$tables,suffix="")
-                             },
-                             error = function(e) {
-                                 logError("Could not create clinic data static table. Error: ", e$message)
-                             },
-                             warning = function(w) {
-                                 logWarn("Could not create clinic data static table. Warning: ", w$message)
-                             }
-                         )
-                     },
-                     output_root = paths$output_root
+logfile <- "clinic_data_static"
+with_file_logger(logfile,
+  {
+    tryCatch(
+      {
+        a4d::export_data_as_parquet(data = read.csv("reference_data/clinic_data_static.csv"), filename = "clinic_data_static", output_root = paths$tables, suffix = "")
+      },
+      error = function(e) {
+        logError("Could not create clinic data static table. Error: ", e$message)
+      },
+      warning = function(w) {
+        logWarn("Could not create clinic data static table. Warning: ", w$message)
+      }
     )
+  },
+  output_root = paths$output_root
+)
 
 
 logfile <- "table_product_data"
 with_file_logger(logfile,
-    {
-        tryCatch(
-            {
-                a4d::create_table_product_data(file.path(paths$output_root, "product_data_cleaned"), paths$tables)
-            },
-            error = function(e) {
-                logError("Could not create table for product data. Error: ", e$message)
-            },
-            warning = function(w) {
-                logWarn("Could not create table for product data. Warning: ", w$message)
-            }
-        )
-    },
-    output_root = paths$output_root
+  {
+    tryCatch(
+      {
+        a4d::create_table_product_data(file.path(paths$output_root, "product_data_cleaned"), paths$tables)
+      },
+      error = function(e) {
+        logError("Could not create table for product data. Error: ", e$message)
+      },
+      warning = function(w) {
+        logWarn("Could not create table for product data. Warning: ", w$message)
+      }
+    )
+  },
+  output_root = paths$output_root
 )
 
 logInfo("Finish creating table files.")
@@ -131,24 +131,23 @@ logInfo("Trying to link files for product and patient data.")
 logfile <- "link_product_patient_data"
 
 with_file_logger(logfile,
-    {
-        tryCatch(
-            {
-                a4d::link_product_patient(
-                    file.path(paths$tables, "product_data.parquet"),
-                    file.path(paths$tables, "patient_data_monthly.parquet")
-                )
-            },
-            error = function(e) {
-                logError("Could not link files for product and patient data. Error: ", e$message)
-            },
-            warning = function(w) {
-                logWarn("Could not link files for product and patient data. Warning: ", w$message)
-            }
+  {
+    tryCatch(
+      {
+        a4d::link_product_patient(
+          file.path(paths$tables, "product_data.parquet"),
+          file.path(paths$tables, "patient_data_monthly.parquet")
         )
-    },
-    output_root = paths$output_root
+      },
+      error = function(e) {
+        logError("Could not link files for product and patient data. Error: ", e$message)
+      },
+      warning = function(w) {
+        logWarn("Could not link files for product and patient data. Warning: ", w$message)
+      }
+    )
+  },
+  output_root = paths$output_root
 )
 
 logInfo("Finished linking files for product and patient data.")
-

From 5bf846bd2c0932ce6ef9d4b1ee0a83388ead352f Mon Sep 17 00:00:00 2001
From: Luke Boelling <lboel@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:27:25 +0100
Subject: [PATCH 6/8] restyle with 4 indent

---
 R/helper_main.R                      | 200 +++++++++++++-------------
 scripts/run_pipeline.R               |   1 -
 scripts/run_script_3_create_tables.R | 202 +++++++++++++--------------
 3 files changed, 201 insertions(+), 202 deletions(-)

diff --git a/R/helper_main.R b/R/helper_main.R
index 1bb1bc6..01c5810 100644
--- a/R/helper_main.R
+++ b/R/helper_main.R
@@ -14,36 +14,36 @@
 #' @return A list with tracker_root_path and output_root path
 #' @export
 init_paths <- function(names, output_dir_name = "output", delete = FALSE) {
-  paths <- list()
-  tracker_root_path <- select_A4D_directory()
-  paths$tracker_root <- tracker_root_path
-
-  output_root <- file.path(
-    tracker_root_path,
-    output_dir_name
-  )
-
-  paths$output_root <- output_root
-
-  for (name in names) {
-    subdir <- file.path(
-      tracker_root_path,
-      output_dir_name,
-      name
+    paths <- list()
+    tracker_root_path <- select_A4D_directory()
+    paths$tracker_root <- tracker_root_path
+
+    output_root <- file.path(
+        tracker_root_path,
+        output_dir_name
     )
 
-    if (fs::dir_exists(subdir)) {
-      if (delete) {
-        fs::dir_delete(subdir)
-      }
-    }
+    paths$output_root <- output_root
+
+    for (name in names) {
+        subdir <- file.path(
+            tracker_root_path,
+            output_dir_name,
+            name
+        )
 
-    fs::dir_create(subdir)
+        if (fs::dir_exists(subdir)) {
+            if (delete) {
+                fs::dir_delete(subdir)
+            }
+        }
 
-    paths[[name]] <- subdir
-  }
+        fs::dir_create(subdir)
+
+        paths[[name]] <- subdir
+    }
 
-  paths
+    paths
 }
 
 
@@ -60,9 +60,9 @@ init_paths <- function(names, output_dir_name = "output", delete = FALSE) {
 #' @return A vector with file names.
 #' @export
 get_files <- function(tracker_root, pattern = "\\.xlsx$") {
-  tracker_files <- list.files(path = tracker_root, recursive = T, pattern = pattern)
-  tracker_files <-
-    tracker_files[stringr::str_detect(tracker_files, "~", negate = T)]
+    tracker_files <- list.files(path = tracker_root, recursive = T, pattern = pattern)
+    tracker_files <-
+        tracker_files[stringr::str_detect(tracker_files, "~", negate = T)]
 }
 
 
@@ -74,14 +74,14 @@ get_files <- function(tracker_root, pattern = "\\.xlsx$") {
 #' @return A list with both patient and product data synonyms as tibble.
 #' @export
 get_synonyms <- function() {
-  ## Extract synonyms for products and patients
-  ## If you encounter new columns, just add the synonyms to these YAML files
-  synonyms_patient <-
-    read_column_synonyms(synonym_file = "synonyms_patient.yaml")
-  synonyms_product <-
-    read_column_synonyms(synonym_file = "synonyms_product.yaml")
-
-  list(patient = synonyms_patient, product = synonyms_product)
+    ## Extract synonyms for products and patients
+    ## If you encounter new columns, just add the synonyms to these YAML files
+    synonyms_patient <-
+        read_column_synonyms(synonym_file = "synonyms_patient.yaml")
+    synonyms_product <-
+        read_column_synonyms(synonym_file = "synonyms_product.yaml")
+
+    list(patient = synonyms_patient, product = synonyms_product)
 }
 
 
@@ -104,21 +104,21 @@ get_synonyms <- function() {
 #' read_column_synonyms(synonym_file = "synonyms_product.yaml")
 #' }
 read_column_synonyms <- function(synonym_file, path_prefixes = c("reference_data", "synonyms")) {
-  path <- do.call(file.path, as.list(c(path_prefixes, synonym_file)))
-  columns_synonyms <-
-    yaml::read_yaml(path) %>%
-    unlist() %>%
-    as.data.frame() %>%
-    tibble::rownames_to_column() %>%
-    # remove digits that were created when converting to data frame
-    dplyr::mutate(
-      rowname = stringr::str_replace(rowname, pattern = "[:digit:]$", "")
-    ) %>%
-    dplyr::rename(
-      "variable_name" = "rowname",
-      "tracker_name" = "."
-    ) %>%
-    dplyr::as_tibble()
+    path <- do.call(file.path, as.list(c(path_prefixes, synonym_file)))
+    columns_synonyms <-
+        yaml::read_yaml(path) %>%
+        unlist() %>%
+        as.data.frame() %>%
+        tibble::rownames_to_column() %>%
+        # remove digits that were created when converting to data frame
+        dplyr::mutate(
+            rowname = stringr::str_replace(rowname, pattern = "[:digit:]$", "")
+        ) %>%
+        dplyr::rename(
+            "variable_name" = "rowname",
+            "tracker_name" = "."
+        ) %>%
+        dplyr::as_tibble()
 }
 
 
@@ -132,31 +132,31 @@ read_column_synonyms <- function(synonym_file, path_prefixes = c("reference_data
 #' @examples
 #' \dontrun{
 #' export_data(
-#'   data = df_raw_product,
-#'   filename = tracker_name,
-#'   output_root = output_root,
-#'   suffix = "_product_data"
+#'     data = df_raw_product,
+#'     filename = tracker_name,
+#'     output_root = output_root,
+#'     suffix = "_product_data"
 #' )
 #' }
 export_data <- function(data, filename, output_root, suffix) {
-  logDebug("Start export_data. Suffix = ", suffix, ".")
-  data %>%
-    write.csv(
-      file =
-        file.path(
-          output_root,
-          paste0(
-            filename,
-            suffix,
-            ".csv"
-          )
-        ),
-      row.names = F,
-      na = "",
-      fileEncoding = "UTF-16LE",
-      quote = T
-    )
-  logInfo("Finish export_data. Suffix = ", suffix, ".")
+    logDebug("Start export_data. Suffix = ", suffix, ".")
+    data %>%
+        write.csv(
+            file =
+                file.path(
+                    output_root,
+                    paste0(
+                        filename,
+                        suffix,
+                        ".csv"
+                    )
+                ),
+            row.names = F,
+            na = "",
+            fileEncoding = "UTF-16LE",
+            quote = T
+        )
+    logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -170,20 +170,20 @@ export_data <- function(data, filename, output_root, suffix) {
 #' @examples
 #' \dontrun{
 #' export_data(
-#'   data = df_raw_product,
-#'   filename = tracker_name,
-#'   output_root = output_root,
-#'   suffix = "_product_data"
+#'     data = df_raw_product,
+#'     filename = tracker_name,
+#'     output_root = output_root,
+#'     suffix = "_product_data"
 #' )
 #' }
 #' @export
 export_data_as_parquet <- function(data, filename, output_root, suffix) {
-  logDebug("Start export_data. Suffix = ", suffix, ".")
-  data %>%
-    arrow::write_parquet(
-      sink = file.path(output_root, paste0(filename, suffix, ".parquet")),
-    )
-  logInfo("Finish export_data. Suffix = ", suffix, ".")
+    logDebug("Start export_data. Suffix = ", suffix, ".")
+    data %>%
+        arrow::write_parquet(
+            sink = file.path(output_root, paste0(filename, suffix, ".parquet")),
+        )
+    logInfo("Finish export_data. Suffix = ", suffix, ".")
 }
 
 
@@ -194,20 +194,20 @@ export_data_as_parquet <- function(data, filename, output_root, suffix) {
 #'
 #' @return tibble with patient data
 read_raw_csv <- function(file) {
-  logDebug("Start reading data with read_csv.")
-  df_patient_raw <- readr::read_csv(
-    file,
-    name_repair = "check_unique",
-    progress = FALSE,
-    show_col_types = FALSE,
-    col_types = readr::cols(.default = "c"),
-    locale = readr::locale(encoding = "UTF-16LE")
-  )
-  logDebug("Finished loading data with read_csv.")
-  logInfo("Dim: ", dim(df_patient_raw))
-  logInfo("Columns: ", spec(df_patient_raw))
-
-  df_patient_raw
+    logDebug("Start reading data with read_csv.")
+    df_patient_raw <- readr::read_csv(
+        file,
+        name_repair = "check_unique",
+        progress = FALSE,
+        show_col_types = FALSE,
+        col_types = readr::cols(.default = "c"),
+        locale = readr::locale(encoding = "UTF-16LE")
+    )
+    logDebug("Finished loading data with read_csv.")
+    logInfo("Dim: ", dim(df_patient_raw))
+    logInfo("Columns: ", spec(df_patient_raw))
+
+    df_patient_raw
 }
 
 
@@ -219,7 +219,7 @@ read_raw_csv <- function(file) {
 #' @return A named character vector with all allowed provinces.
 #' @export
 get_allowed_provinces <- function() {
-  ## Should new countries and provinces be added, update the YAML file
-  provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") |> unlist()
-  return(provinces)
+    ## Should new countries and provinces be added, update the YAML file
+    provinces <- yaml::read_yaml("reference_data/provinces/allowed_provinces.yaml") |> unlist()
+    return(provinces)
 }
diff --git a/scripts/run_pipeline.R b/scripts/run_pipeline.R
index fc338f3..2f29708 100644
--- a/scripts/run_pipeline.R
+++ b/scripts/run_pipeline.R
@@ -113,4 +113,3 @@ ingest_data(
     table = "clinic_data_static",
     source = file.path(table_dir, "clinic_data_static.parquet")
 )
-
diff --git a/scripts/run_script_3_create_tables.R b/scripts/run_script_3_create_tables.R
index 995c0bd..bfeae2e 100644
--- a/scripts/run_script_3_create_tables.R
+++ b/scripts/run_script_3_create_tables.R
@@ -11,117 +11,117 @@ patient_data_files <- a4d::get_files(file.path(paths$output_root, "patient_data_
 product_data_files <- a4d::get_files(file.path(paths$output_root, "product_data_cleaned"), pattern = "\\.parquet$")
 
 logInfo(
-  "Found ",
-  length(patient_data_files),
-  " patient csv files under ",
-  paths$tracker_root,
-  "."
+    "Found ",
+    length(patient_data_files),
+    " patient csv files under ",
+    paths$tracker_root,
+    "."
 )
 logInfo(
-  "Found ",
-  length(product_data_files),
-  " product csv files under ",
-  paths$tracker_root,
-  "."
+    "Found ",
+    length(product_data_files),
+    " product csv files under ",
+    paths$tracker_root,
+    "."
 )
 
 logInfo("Start creating table csv files.")
 
 logfile <- "table_patient_data_static"
 with_file_logger(logfile,
-  {
-    tryCatch(
-      {
-        a4d::create_table_patient_data_static(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
-      },
-      error = function(e) {
-        logError("Could not create table csv for static patient data. Error: ", e$message)
-      },
-      warning = function(w) {
-        logWarn("Could not create table csv for static patient data. Error: ", w$message)
-      }
-    )
-  },
-  output_root = paths$output_root
+    {
+        tryCatch(
+            {
+                a4d::create_table_patient_data_static(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
+            },
+            error = function(e) {
+                logError("Could not create table csv for static patient data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create table csv for static patient data. Error: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
 )
 
 logfile <- "table_patient_data_monthly"
 with_file_logger(logfile,
-  {
-    tryCatch(
-      {
-        a4d::create_table_patient_data_monthly(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
-      },
-      error = function(e) {
-        logError("Could not create table csv for monthly patient data. Error: ", e$message)
-      },
-      warning = function(w) {
-        logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
-      }
-    )
-  },
-  output_root = paths$output_root
+    {
+        tryCatch(
+            {
+                a4d::create_table_patient_data_monthly(patient_data_files, file.path(paths$output_root, "patient_data_cleaned"), paths$tables)
+            },
+            error = function(e) {
+                logError("Could not create table csv for monthly patient data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create table csv for monthly patient data. Error: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
 )
 
 logfile <- "table_longitudinal_data_hba1c"
 with_file_logger(logfile,
-  {
-    tryCatch(
-      {
-        a4d::create_table_longitudinal_data(
-          patient_data_files,
-          file.path(paths$output_root, "patient_data_cleaned"),
-          paths$tables,
-          "hba1c_updated",
-          "hba1c"
+    {
+        tryCatch(
+            {
+                a4d::create_table_longitudinal_data(
+                    patient_data_files,
+                    file.path(paths$output_root, "patient_data_cleaned"),
+                    paths$tables,
+                    "hba1c_updated",
+                    "hba1c"
+                )
+            },
+            error = function(e) {
+                logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
+            }
         )
-      },
-      error = function(e) {
-        logError("Could not create table csv for longitudinal patient data. Error: ", e$message)
-      },
-      warning = function(w) {
-        logWarn("Could not create table csv for longitudinal patient data. Error: ", w$message)
-      }
-    )
-  },
-  output_root = paths$output_root
+    },
+    output_root = paths$output_root
 )
 
 logfile <- "clinic_data_static"
 with_file_logger(logfile,
-  {
-    tryCatch(
-      {
-        a4d::export_data_as_parquet(data = read.csv("reference_data/clinic_data_static.csv"), filename = "clinic_data_static", output_root = paths$tables, suffix = "")
-      },
-      error = function(e) {
-        logError("Could not create clinic data static table. Error: ", e$message)
-      },
-      warning = function(w) {
-        logWarn("Could not create clinic data static table. Warning: ", w$message)
-      }
-    )
-  },
-  output_root = paths$output_root
+    {
+        tryCatch(
+            {
+                a4d::export_data_as_parquet(data = read.csv("reference_data/clinic_data_static.csv"), filename = "clinic_data_static", output_root = paths$tables, suffix = "")
+            },
+            error = function(e) {
+                logError("Could not create clinic data static table. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create clinic data static table. Warning: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
 )
 
 
 logfile <- "table_product_data"
 with_file_logger(logfile,
-  {
-    tryCatch(
-      {
-        a4d::create_table_product_data(file.path(paths$output_root, "product_data_cleaned"), paths$tables)
-      },
-      error = function(e) {
-        logError("Could not create table for product data. Error: ", e$message)
-      },
-      warning = function(w) {
-        logWarn("Could not create table for product data. Warning: ", w$message)
-      }
-    )
-  },
-  output_root = paths$output_root
+    {
+        tryCatch(
+            {
+                a4d::create_table_product_data(file.path(paths$output_root, "product_data_cleaned"), paths$tables)
+            },
+            error = function(e) {
+                logError("Could not create table for product data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not create table for product data. Warning: ", w$message)
+            }
+        )
+    },
+    output_root = paths$output_root
 )
 
 logInfo("Finish creating table files.")
@@ -131,23 +131,23 @@ logInfo("Trying to link files for product and patient data.")
 logfile <- "link_product_patient_data"
 
 with_file_logger(logfile,
-  {
-    tryCatch(
-      {
-        a4d::link_product_patient(
-          file.path(paths$tables, "product_data.parquet"),
-          file.path(paths$tables, "patient_data_monthly.parquet")
+    {
+        tryCatch(
+            {
+                a4d::link_product_patient(
+                    file.path(paths$tables, "product_data.parquet"),
+                    file.path(paths$tables, "patient_data_monthly.parquet")
+                )
+            },
+            error = function(e) {
+                logError("Could not link files for product and patient data. Error: ", e$message)
+            },
+            warning = function(w) {
+                logWarn("Could not link files for product and patient data. Warning: ", w$message)
+            }
         )
-      },
-      error = function(e) {
-        logError("Could not link files for product and patient data. Error: ", e$message)
-      },
-      warning = function(w) {
-        logWarn("Could not link files for product and patient data. Warning: ", w$message)
-      }
-    )
-  },
-  output_root = paths$output_root
+    },
+    output_root = paths$output_root
 )
 
 logInfo("Finished linking files for product and patient data.")

From aea0eeca7b449ae11907913e2c95b1f645a6e18a Mon Sep 17 00:00:00 2001
From: Luke Boelling <lboel@users.noreply.github.com>
Date: Thu, 4 Jan 2024 11:48:41 +0100
Subject: [PATCH 7/8] adding pipe operator with usethis readd sysdata.rda to R
 afet failing to use lazyload in data.

---
 DESCRIPTION    |   3 ++-
 NAMESPACE      |   2 ++
 R/sysdata.rda  | Bin 0 -> 2763 bytes
 R/utils-pipe.R |  14 ++++++++++++++
 man/pipe.Rd    |  20 ++++++++++++++++++++
 5 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 R/sysdata.rda
 create mode 100644 R/utils-pipe.R
 create mode 100644 man/pipe.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 1ad85e1..bac3470 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -28,7 +28,8 @@ Imports:
     arrow,
     furrr,
     progressr,
-    tictoc
+    tictoc,
+    magrittr
 Suggests: 
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index c69c3f1..d7328b3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export("%>%")
 export(calculate_most_frequent)
 export(convert_to)
 export(correct_decimal_sign)
@@ -42,3 +43,4 @@ export(setup_file_logger)
 export(setup_logger)
 export(with_file_logger)
 importFrom(data.table,"%like%")
+importFrom(magrittr,"%>%")
diff --git a/R/sysdata.rda b/R/sysdata.rda
new file mode 100644
index 0000000000000000000000000000000000000000..eb303cf804b77f1badf0b2d97168f0a3f6920e66
GIT binary patch
literal 2763
zcmZvZXFL=P0Edyix6T&Mo`vi^&WNO(6=xL=*-nncy=Lg{;zIJCQJ0<0of#oAva=&`
zoKYEZWW4M5e(V3?`S5&uet)q{Myh{Dzg;jKD%8u@8vsI^WgxeROa*hIZWl~?PK{qR
zT}R%)ok+OaFnt1DeC+cLy`o%iSoR|in<|<?Lmw_5SHETYrO{%ctgSCqc>VaZq4BBd
zSi3}wf6Ss(aES2(Q79R{Hnlc%Uw1EWdFRZhltRP|tnx?LWX6PN!YCT~hUEBGJZeBm
z=|Pwb0j<z6yjrtn06BFJ`Eq9a?A-m@mb<HGN@C27Qmwvw>mH+@kE8(>SJK?}!Zu%t
za1$&~ZbNI|q>HJmXAL3kETIaOO0)Pj%9Qtbgv<KT;C6&UMN=|mPZ{t5A>b1xSCmE8
z68|`9_dGkb%x3s0i_T`!{$xA}ktiv)XKdBm*=p*4gWvl;b!a~w6Zr51v=VJfYI^ze
zRD4DGNDNN<wkY+p_KUsISr*T)ea)C3MJ+|kN72SET-3FlQu*FvLrvwu0lkqc2$PS7
z87~dFAbOFYxIv397L#qCoLcO&Ec06UY`Ij)rRj1wzflNc@X=j+wvx!7=W4$e)!gQ!
zsR1D#v0YZs_Z*JMPxFQo2~I($6rnkGcJsNmMh^Q0nKv&0bB4H(pq5`WlNDVuZhJtv
zgz5dbEv5@hCNPl4H)cTKGt*ItL|&bE8Q<85#emqir`k8OP~rkPXYD&LX7~>`HIiWW
zJ{chE$-;ViXJ<!^9d74xW3M&CK`*zpyANLqWg-gZrgX9yv`ph0wzvs~Kj^V=E$m3?
zHTN0Oqd^(TFwM?L4+hdELpRTwz+Ky*#}4j>^ToOKJn7eiN{D}&DtpzG@gzUN13UZc
zoxHq<?H*C<3O+LCfkvsPbXj7AqG+m9_6Q>KcdeIf1oG@vZoll3S}L2*i(iltz($o*
z@8$ZY63J%RO~<ZMkbP)TLF0s!lkS7IN$3aSXDRkk=rSrvGA0ZUV2dlsyjITthUR*v
zt*_K4pK3dkULgqAd+VLsilQh^VYcbCl7QfF>HKDuEA#<I7Lrsvy7*~io2J`*=0e)t
zh_#;fp_OJ-H){&i-KI!OaOZ<Z(}FOe7Jp0wWlQ0F&nDS4Fd%N{Yt09vYKiyO0&pvY
z<DN8jHM6szL={=ngxFR^&+4`*W;S{?c9+_>%<iqgbnO_i)OP-R5~D6Ur{9Ll(y|~7
zm|=<mWAmy{TIzoJ)f}@x**EscgH@lL0FEAO1<fxwS$CBgUsW4~F><}+5!NC3Cj)Ij
zbn~KrRj1<x8Q5KC*m3#zqJP6WP&c6AfmFM|79)@irCK|~r3Vg)W3<ZEeyX}=jO6(0
zoth@QqMNXLMJWoI(sDmsVqDYK&Pk7)R9qaloigr=+K^)HtS-(v-vBUC>1c}tjzDb9
zzma0vlIB<N9-)1d3rFeu^;CCw^$wD{m4#zDgTSL);Ha~X2kRMVISs6fH;QX1-7>Pq
zxJsG3);R}T%#fM>j<c9CN9<2i3Etp$^Gvh>m~L8jf(_uCMxl_hncE)$GZo2FIOZ2w
z&H$$Q`;H?AeHer<#Ypi0Q`aHn5xYy4*C$!3(CS;OO}TI1asRk#uzJAIGJa=vXS;AQ
zHxYh3LpHp<o9TP3nBg`R9U6KzHpH4%e!M+|FGSX~xF*}3%hL0&wYjn$z-oAnj<}b<
zkAuOB&(3C|-Vp2|VKWN+hl_0qF-rOKn9Sg{h0Qswx8MC+qKnoKNKcGhW-i1OnC*UF
z_<WKiv(U21@DQ34D_D1O^O|SeFJ5|lNa)$O12HMVtEwY{Cy8bFibRyp)S`hOAvA%F
zgX^lwVe+s28ASwmEL1tFh>VXl2PeT1rQm%zVJeHbN@pTffNb9#(%&KV3}min1Qpb_
zF0l>PIdahBJ@+)B_$rZpxl*=ao<uEf;->~L*N_O|K>8WV%t9N=cfV;6!}E5D>><Xg
zvCeWkBIyF0TDpc*m%i9jyRQxC3yjKZR190Xw%+hAptrw5yubJ%(k<BbIV|@S_e)S`
zLW*VUV*e|X%I`*Kq=9)c2JqbavUH*CJx{w(qZas{o7$)?NW1-vk-l|lEhMl;LjqWj
zI(0oIvvB=D@}hdpqPHkZNtlu1gi*TH?ONpgk`T=Y$qt?IHPpUdklrYmJ_xRiKOU6|
zE9tZM6eX&3I~_K4C0<(6;#m4b{UcuPfo|^B+d)xrL^cN~wxG&=V~{i@Y7Tm_ENAcd
zh{5{D5MTTA<s{`FaJuCnuIUzso<GpGZ-A?lqEC7^lqUebXq|MMR;v}4x_q=xVLFNE
zIX-zUgbNpi#6n>ei1gJ1q`lvUQ#)x&-?CCBq1tE&@S1FsG`l~4`--qCtS3bHXIyu$
zvJ!zws6U9Gn}gZ7+HO?|PhE@aP6|xTPkT%pw2t_R)<#oyxp&ECy5%@Q#ogqxtkg_E
zKZ%!ff-SO4<c4})zT?FqZq=uycRfumW&@riD4R;F!sP}E+22OTa%m%L84d$^bR^FB
z$emWRcXpXm35V_ZPbh9Q6X6n%pA(I&n!#?BrjOTTxi{$Wu=9)(4Xwfk3Z!Xd|8=rq
ziZ@&jq90!VjF=_nJaKsRBIJ_8X74?h%S8BisU~zxruDdA$8T>#e&Kr?yOU1qn*$le
zxtLQ{b#%U6fZBP)@Q#`LUKF#6fcoY-XEB?LW!04Ay=y@onNBO252<d4DK7!g{yfc*
zEm-UfbkqP5H&s!}qG=oWSu-&i6rNfk`YgWi_;v0>U}9f4OIW=K!r+Lm+fp%~@TU`Z
zt3vCoHl&YpaQ0$;J0B!p0)Om4Dka?AnRQzio?sWke@Xzb=<u^DV!(BE4xJ`^EV^I{
z&P>2-F2nH_ug7&(b2YvmM|1|(iO<jSI<LP6<P+a-VG7aY_5U4iKs5RMe+L^#Bd`B&
zK+1UMejTI09AOTrksahiG+|YpdH<7@%n>&sHO!>=(rDlOaf^x`D<S{cx-kpsV5jl-
z*Je3i4m6k(F5lAjAp-{b!2a)FtKX9Lxp_Iy$VFd2emY>Xz|VKHPct|8(ws>&-ONqj
zuRnW2cxi_<`TM~m^<G|`-4786X8u>;=}sEpq#7<86lMF$$58~efpO|B)A>)a*#>$!
zUO)+ACOXRKff}l~B+$Q#2UUQX=q>{SHPmqFAUoSYZ^wVtyE1+tUKNJ`DcJ@>99dAV
zn2CQ>wbvN)-*ALQ3LFFuEtH;ix&CtmS^~(gsOVJZ(DRO+e??oRvyrd7O12q$F~eIv
zqg<BH94FtOjg=j2epjXNKNNFvCP^;UlT{-5kE<o;j2hFSdY=E}eHV8FmM+b_H~IhK
z+^3UO8i9AB<vkE3KAjy8)9L)i{_B0n+D1Ef3;i0qp5ZN<fs@U{I)yhF<|M={ruRjH
zvvud$ushRxU3)U^S~@Zn6ku;SRqg(6CPU#M8&)D>7#<rX_TjC50%zX+HGPZY8iBf{
zlMKX1Sg82z_QjZ1uY)i8EPY#>oF3eC?;0lSzk8??Y|dFu0%NR>?l7IkuC*U?0QZk_
Jze-V2{RJ`8Mw<Wt

literal 0
HcmV?d00001

diff --git a/R/utils-pipe.R b/R/utils-pipe.R
new file mode 100644
index 0000000..fd0b1d1
--- /dev/null
+++ b/R/utils-pipe.R
@@ -0,0 +1,14 @@
+#' Pipe operator
+#'
+#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
+#'
+#' @name %>%
+#' @rdname pipe
+#' @keywords internal
+#' @export
+#' @importFrom magrittr %>%
+#' @usage lhs \%>\% rhs
+#' @param lhs A value or the magrittr placeholder.
+#' @param rhs A function call using the magrittr semantics.
+#' @return The result of calling `rhs(lhs)`.
+NULL
diff --git a/man/pipe.Rd b/man/pipe.Rd
new file mode 100644
index 0000000..a648c29
--- /dev/null
+++ b/man/pipe.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils-pipe.R
+\name{\%>\%}
+\alias{\%>\%}
+\title{Pipe operator}
+\usage{
+lhs \%>\% rhs
+}
+\arguments{
+\item{lhs}{A value or the magrittr placeholder.}
+
+\item{rhs}{A function call using the magrittr semantics.}
+}
+\value{
+The result of calling \code{rhs(lhs)}.
+}
+\description{
+See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
+}
+\keyword{internal}

From 3225c6349aa3cfb3dc456916afb8098da442d392 Mon Sep 17 00:00:00 2001
From: Michael Aydinbas <michael.aydinbas@gmail.com>
Date: Thu, 25 Jan 2024 20:09:23 +0100
Subject: [PATCH 8/8] add sysdata?

---
 R/sysdata.rda | Bin 2763 -> 2763 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/R/sysdata.rda b/R/sysdata.rda
index eb303cf804b77f1badf0b2d97168f0a3f6920e66..be6b002cff04d1329d0fdca86a74da91bb6a0978 100644
GIT binary patch
delta 19
XcmX>tdRmlAzMF#q445}^9pnN4Eu91g

delta 19
XcmX>tdRmlAzMF#q4A?eu9pnN4Evy6y