From e4dd97fde2e394a977944d57677bc3857d2b115a Mon Sep 17 00:00:00 2001
From: Spinner <spinnerj@emea.roche.com>
Date: Wed, 22 Jan 2025 13:55:01 +0100
Subject: [PATCH] feat: create duplicates for slope plots

---
 NAMESPACE                |  1 +
 R/create_duplicates.R    | 95 ++++++++++++++++++++++++++++++++++++++++
 R/lambda_slope_plot.R    | 22 ++++++----
 man/create_duplicates.Rd | 49 +++++++++++++++++++++
 4 files changed, 158 insertions(+), 9 deletions(-)
 create mode 100644 R/create_duplicates.R
 create mode 100644 man/create_duplicates.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 540e2f87..9c7d315e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,6 +7,7 @@ export(apply_filters)
 export(apply_labels)
 export(as_factor_preserve_label)
 export(calculate_summary_stats)
+export(create_duplicates)
 export(create_start_impute)
 export(filter_breaks)
 export(flexible_violinboxplot)
diff --git a/R/create_duplicates.R b/R/create_duplicates.R
new file mode 100644
index 00000000..0a3bc06c
--- /dev/null
+++ b/R/create_duplicates.R
@@ -0,0 +1,95 @@
+#' Create duplicates in concentration data with Predose and Last Values for Dosing Cycles
+#'
+#' This function duplicates and adjusts concentration data to ensure all dosing cycles have 
+#' complete predose and last concentration values. It is designed for use in pharmacokinetic 
+#' analyses where dosing intervals and concentration values need to be aligned for each dose.
+#'
+#' @param conc_data A data frame containing concentration data.
+#' @param groups A character vector of column names to use for grouping (e.g., c("USUBJID", "ANALYTE", "PCSPEC")).
+#' @param dosno Column name for the dose number (default: "DOSNO").
+#' @param arrlt Column name for time from the most recent dose (default: "ARRLT").
+#' @param afrlt Column name for time from the first dose (default: "AFRLT").
+#' @param nrrlt Column name for the numeric relative time (default: "NRRLT").
+#' @param nfrlt Column name for the nominal relative time (default: "NFRLT").
+#'
+#' @return A data frame with adjusted concentration data, including:
+#'   - Duplicated predose values assigned to the previous dose.
+#'   - Duplicated last values assigned to the next dose if predose values are missing.
+#'   - Sorted by the grouping variables and relative time.
+#'
+#' @examples
+#' # Example usage
+#' result <- adjust_concentration_data(conc_data, groups = c("USUBJID", "ANALYTE", "PCSPEC"), dosno = "DOSNO")
+#'
+#' @export
+create_duplicates <- function(conc_data,
+                                      groups = c("USUBJID", "ANALYTE", "PCSPEC"),
+                                      dosno = "DOSNO",
+                                      arrlt = "ARRLT",
+                                      afrlt = "AFRLT",
+                                      nrrlt = "NRRLT",
+                                      nfrlt = "NFRLT") {
+  # Helper to construct grouping keys
+  group_keys <- function(data, keys) {
+    data %>%
+      group_by(across(all_of(keys)))
+  }
+  
+  # Step 1: Identify the dosing times (ARRLT == 0)
+  dose_times <- conc_data %>%
+    mutate(dose_time = .data[[afrlt]] - .data[[arrlt]]) %>%
+    select(all_of(groups), .data[[dosno]], dose_time) %>%
+    group_keys(c(groups, dosno)) %>%
+    summarize(dose_time = first(dose_time), .groups = "drop")
+  
+  # Step 2: Calculate dosing intervals
+  dosing_intervals <- dose_times %>%
+    group_keys(groups) %>%
+    mutate(
+      interval_next = lead(dose_time) - dose_time,
+      interval_prev = dose_time - lag(dose_time),
+      interval_next = replace_na(interval_next, 0),
+      interval_prev = replace_na(interval_prev, 0),
+      next_dose = interval_next + dose_time
+    ) %>%
+    ungroup()
+  
+  # Step 3: Duplicate predose values for the previous dose
+  predose_duplicates <- conc_data %>%
+    filter(.data[[arrlt]] <= 0, .data[[dosno]] > 1) %>%
+    left_join(dosing_intervals, by = c(dosno, groups)) %>%
+    mutate(
+      !!dosno := .data[[dosno]] - 1,
+      !!arrlt := .data[[arrlt]] + interval_prev,
+      !!nrrlt := .data[[nfrlt]] - interval_prev
+    ) %>%
+    select(-interval_next, -interval_prev)
+  
+  # Step 4: Identify missing predose values for the next dose
+  missing_predose <- dose_times %>%
+    anti_join(
+      conc_data %>% filter(.data[[arrlt]] < 0),
+      by = c(groups, dosno)
+    )
+  
+  # Step 5: Duplicate last value of the previous dose
+  last_values <- conc_data %>%
+    semi_join(missing_predose, by = c(groups, dosno)) %>%
+    group_keys(c(groups, dosno)) %>%
+    slice_tail(n = 1) %>%
+    ungroup() %>%
+    left_join(dosing_intervals, by = c(groups, dosno)) %>%
+    mutate(
+      !!dosno := .data[[dosno]] + 1,
+      !!arrlt := .data[[arrlt]] - interval_prev,
+      !!nrrlt := .data[[nrrlt]] + interval_prev
+    ) %>%
+    select(-interval_next, -interval_prev)
+  
+  # Step 6: Combine all data
+  conc_data <- conc_data %>%
+    bind_rows(predose_duplicates, last_values) %>%
+    arrange(across(all_of(c(groups, dosno, arrlt))))
+  
+  return(conc_data)
+}
diff --git a/R/lambda_slope_plot.R b/R/lambda_slope_plot.R
index aa8b004d..c0a816d5 100644
--- a/R/lambda_slope_plot.R
+++ b/R/lambda_slope_plot.R
@@ -55,6 +55,7 @@ lambda_slope_plot <- function(
   R2ADJTHRESHOL = 0.7
 ) {
 
+  conc_pknca_df <- create_duplicates(conc_pknca_df)
   # Obtain all information relevant regarding lambda calculation
   lambda_res <- res_pknca_df %>%
     filter(DOSNO == dosno, USUBJID == usubjid, type_interval == "main")  %>%
@@ -67,16 +68,20 @@ lambda_slope_plot <- function(
 
   # Identify in the data the points used to calculate lambda
   lambda_z_ix_rows <- conc_pknca_df %>%
+    mutate(ARRLT = round(ARRLT, 3)) %>%
     filter(
       DOSNO == dosno,
       USUBJID == usubjid,
       !exclude_half.life,
-      TIME >= sum(
+      ARRLT >= round(
+        sum(
         subset(
           lambda_res,
           lambda_res$PPTESTCD == "lambda.z.time.first",
           select = c("start", "PPORRES")
-        )
+          )
+        ),
+        3
       )
     ) %>%
     arrange(IX) %>%
@@ -85,11 +90,10 @@ lambda_slope_plot <- function(
   # Calculate the base and adjusted fitness, half life and time span estimated
   r2_value <- signif(as.numeric(lambda_res$PPORRES[lambda_res$PPTESTCD == "r.squared"]), 3)
   r2adj_value <- signif(as.numeric(lambda_res$PPORRES[lambda_res$PPTESTCD == "adj.r.squared"]), 3)
-  half_life_value <- signif(
-    log(2) / as.numeric(lambda_res$PPORRES[lambda_res$PPTESTCD == "lambda.z"]), 3
+  half_life_value <- signif( as.numeric(lambda_res$PPORRES[lambda_res$PPTESTCD == "half.life"]), 3
   )
   time_span <- signif(
-    abs(lambda_z_ix_rows$TIME[nrow(lambda_z_ix_rows)] - lambda_z_ix_rows$TIME[1]), 3
+    abs(lambda_z_ix_rows$ARRLT[nrow(lambda_z_ix_rows)] - lambda_z_ix_rows$ARRLT[1]), 3
   )
 
   # Determine the color based on the conditions
@@ -157,7 +161,7 @@ lambda_slope_plot <- function(
 
   # Generate the base scatter ggplot
   p <- plot_data %>%
-    ggplot(aes(x = TIME, y = AVAL)) +
+    ggplot(aes(x = ARRLT, y = AVAL)) +
     geom_line(color = "gray70", linetype = "solid", linewidth = 1) +
     geom_smooth(
       data = subset(plot_data, IX_color == "hl.included"),
@@ -176,7 +180,7 @@ lambda_slope_plot <- function(
     labs(
       title = paste0("USUBJID: ", usubjid, ", DOSNO: ", dosno),
       y = paste0("Log10 Concentration (", conc_pknca_df $PCSTRESU[1], ")"),
-      x = paste0("Actual time post dose (", conc_pknca_df $RRLTU[1], ")")
+      x = paste0("Actual Time Post Dose (", conc_pknca_df $RRLTU[1], ")")
     ) +
     theme_bw() +
 
@@ -230,9 +234,9 @@ lambda_slope_plot <- function(
     # Make this trace the only one
     add_trace(
       data = plot_data %>% filter(DOSNO == dosno, USUBJID == usubjid),
-      x = ~TIME, y = ~log10(AVAL),
+      x = ~ARRLT, y = ~log10(AVAL),
       customdata = ~paste0(USUBJID, "_", DOSNO, "_", IX),
-      text = ~paste0("Data Point: ", IX, "\n", "(", signif(TIME, 2), " , ", signif(AVAL, 2), ")"),
+      text = ~paste0("Data Point: ", IX, "\n", "(", signif(ARRLT, 2), " , ", signif(AVAL, 2), ")"),
       type = "scatter",
       mode = "markers",
       name = "Data Points",
diff --git a/man/create_duplicates.Rd b/man/create_duplicates.Rd
new file mode 100644
index 00000000..bf2f5e0c
--- /dev/null
+++ b/man/create_duplicates.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/create_duplicates.R
+\name{create_duplicates}
+\alias{create_duplicates}
+\title{Create duplicates in concentration data with Predose and Last Values for Dosing Cycles}
+\usage{
+create_duplicates(
+  conc_data,
+  groups = c("USUBJID", "ANALYTE", "PCSPEC"),
+  dosno = "DOSNO",
+  arrlt = "ARRLT",
+  afrlt = "AFRLT",
+  nrrlt = "NRRLT",
+  nfrlt = "NFRLT"
+)
+}
+\arguments{
+\item{conc_data}{A data frame containing concentration data.}
+
+\item{groups}{A character vector of column names to use for grouping (e.g., c("USUBJID", "ANALYTE", "PCSPEC")).}
+
+\item{dosno}{Column name for the dose number (default: "DOSNO").}
+
+\item{arrlt}{Column name for time from the most recent dose (default: "ARRLT").}
+
+\item{afrlt}{Column name for time from the first dose (default: "AFRLT").}
+
+\item{nrrlt}{Column name for the numeric relative time (default: "NRRLT").}
+
+\item{nfrlt}{Column name for the nominal relative time (default: "NFRLT").}
+}
+\value{
+A data frame with adjusted concentration data, including:
+\itemize{
+\item Duplicated predose values assigned to the previous dose.
+\item Duplicated last values assigned to the next dose if predose values are missing.
+\item Sorted by the grouping variables and relative time.
+}
+}
+\description{
+This function duplicates and adjusts concentration data to ensure all dosing cycles have
+complete predose and last concentration values. It is designed for use in pharmacokinetic
+analyses where dosing intervals and concentration values need to be aligned for each dose.
+}
+\examples{
+# Example usage
+result <- adjust_concentration_data(conc_data, groups = c("USUBJID", "ANALYTE", "PCSPEC"), dosno = "DOSNO")
+
+}