From c223eb075940764a3c25cc382bd5c7ef03af6725 Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:35:35 -0400
Subject: [PATCH 1/9] document custom functions in FAQ

---
 vignettes/faq.Rmd       | 68 +++++++++++++++++++++++++++++++++++++++++
 vignettes/rifttable.Rmd | 34 ---------------------
 2 files changed, 68 insertions(+), 34 deletions(-)

diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd
index 54bd207..a90f214 100644
--- a/vignettes/faq.Rmd
+++ b/vignettes/faq.Rmd
@@ -354,3 +354,71 @@ tribble(
 ```
 
 Patients with ECOG performance status of 2, compared to those with performance status 0, are on average 8% older (or 9%, if based on geometric means).
+
+# How do I make rifttable calculate an estimand that is not built-in?
+
+While the package provides a number of estimators commonly used in epidemiology, it will never be able to include all possible estimators. However, any custom estimate can be integrated into a rifttable by a defining custom estimation function.
+
+The subsequent example will reproduce the following basic rifttable, which shows the mean age by sex, stratified by ECOG performance status, in the `cancer` data set:
+
+```{r custom, message = FALSE}
+data(cancer, package = "survival")
+cancer <- cancer %>%
+  tibble::as_tibble() %>%
+  mutate(
+    sex = factor(
+      sex,
+      levels = 1:2,
+      labels = c("Male", "Female")))
+
+design <- tibble::tibble(
+  type = "mean",
+  exposure = "sex",
+  outcome = "age",
+  effect_modifier = "ph.ecog",
+  stratum = 1:2,
+  label = paste0("ECOG PS ", stratum, ": mean age"))
+
+design %>%
+  rifttable(
+    data = cancer,
+    overall = TRUE) %>%
+  rt_gt()
+```
+
+Instead of relying on rifttable's built-in estimator `type = "mean"`, we will define a custom function that calculates the mean:
+
+```{r custom_fn}
+estimate_my_mean <- function(data, ...) {
+  data %>%
+    group_by(.exposure) %>%
+    summarize(
+      res = paste(
+        round(
+          mean(.outcome),
+          digits = 3),
+        "yrs"))
+}
+```
+
+Use the custom function `my_mean` instead of the built-in `mean`:
+
+```{r custom_use}
+design %>%  # Edit the previous design
+  mutate(
+    type = "my_mean",  # Replace built-in "mean" by custom "my_mean"
+    label = paste0(label, " (custom)")) %>%
+  rifttable(
+    data = cancer,
+    overall = TRUE) %>%
+  rt_gt()
+```
+
+Specifications for custom functions:
+
+* The function name must start with `estimate_`; this prefix is to be omitted when later calling the custom function within `rifttable()`.
+* The `data` provided to `rifttable()` will be available to the custom function as an argument named `data`.
+* The `data` are already subsetted to the `stratum` of the `effect_modifier`, if applicable.
+* Copies of key variables are accessible under the names, `.exposure`, `.outcome`, `.event`, `.time`, and `.time2`, as applicable.
+* The function must accept all elements of a rifttable `design` (e.g., `confounders`,  `digits`, `na_rm`, etc.) and of the `rifttable()` function (e.g., `reference`, `risk_percent`) as arguments. Many may not be relevant and can be captured in the argument list, `...` (see example).
+* The function must return a tibble/data frame with one column for `.exposure`, with one row per exposure category, and one string column `res` for the estimate.
diff --git a/vignettes/rifttable.Rmd b/vignettes/rifttable.Rmd
index 8b2b2cc..3bbbfc0 100644
--- a/vignettes/rifttable.Rmd
+++ b/vignettes/rifttable.Rmd
@@ -216,37 +216,3 @@ tibble::tribble(
       dplyr::mutate(ph.ecog_factor = factor(ph.ecog))) %>% 
   rt_gt()
 ```
-
-
-# Example 7: Custom functions 
-
-This example also adds an overall estimate not stratified by exposure.
-
-```{r ex7}
-library(dplyr)  # for easier data handling
-
-# Define custom function, must start with "estimate_"
-estimate_my_example <- function(data, ...) {
-  # Variables have been renamed in '.exposure' and '.outcome'
-  data %>%
-    group_by(.exposure) %>%
-    summarize(
-      res = paste(
-        round(
-          mean(.outcome),
-          digits = 3)))
-}
-
-# Call custom function, omit "estimate_"
-tibble::tribble(
-  ~label,                    ~type,        ~stratum,
-  "Mean: Built-in function", "mean",       1,
-  "Mean: Custom function",   "my_example", 1) %>%
-  mutate(
-    exposure = "sex",
-    outcome = "status",
-    effect_modifier = "ph.ecog") %>%
-  rifttable(
-    data = cancer,
-    overall = TRUE)
-```

From f1f7043004f688c62c73d6650f5f9cf46ed005ad Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:37:08 -0400
Subject: [PATCH 2/9] FAQ: add changing confidence level

---
 vignettes/faq.Rmd | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd
index a90f214..06d78d1 100644
--- a/vignettes/faq.Rmd
+++ b/vignettes/faq.Rmd
@@ -334,6 +334,11 @@ In all models, after exponentiation, beta coefficients can be interpreted as rat
 In the `cancer` data (see previous question), ratios of usual (arithmetic) means of age could be considered informative, given that `hist(cancer$age)` etc. does not show a major skew in this outcome.
 
 ```{r table2_ratio}
+# How I do I change the level for confidence intervals?
+
+Add a `ci` column to the `design`:
+
+```{r change_ci}
 tribble(
   ~label,                                       ~type,
   "Observations",                               "total",
@@ -344,12 +349,22 @@ tribble(
   "  Ratio of arithmetric means",               "fold",
   "  Ratio of arithmetric means, empirical SE", "irrrob",
   "  Ratio of geometric means",                 "foldlog") %>%
+  ~label,            ~type,                   ~ci,
+  "Deaths/N (Risk)", "outcomes/total (risk)", NA,
+  "Risk ratio",      "",                      NA,
+  "  80% CI",        "rr",                    0.8,
+  "  95% CI",        "rr",                    NA,  # Defaults to 0.95
+  "  99% CI",        "rr",                    0.99) %>%
   mutate(
     exposure = "ph.ecog",
     outcome = "age") %>%
+    exposure = "stage",
+    outcome = "death") %>%
   rifttable(
     data = cancer, 
     ratio_digits = 3) %>%  # show extra digits to highlight (minor) differences
+    data = breastcancer,
+    risk_percent = TRUE) %>%
   rt_gt()  # obtain formatted output
 ```
 

From 88db543fbff2f3e2b224fe5b77e37ca688933b5e Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:38:57 -0400
Subject: [PATCH 3/9] document joint models in FAQ

---
 vignettes/estimators.Rmd | 10 ----------
 vignettes/faq.Rmd        | 25 +++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/vignettes/estimators.Rmd b/vignettes/estimators.Rmd
index ca7e18f..e0c0b79 100644
--- a/vignettes/estimators.Rmd
+++ b/vignettes/estimators.Rmd
@@ -95,16 +95,6 @@ knitr::opts_chunk$set(
 `"medsurv (ci)"` | Median survival with 95% confidence interval.
 
 
-# Joint models for comparative estimates
-
-By default, regression models will be fit separately for each stratum of the `effect_modifier`. 
-
-Append `"_joint"` to `"hr"`, `"rr"`, `"rd"`, `"irr"`, `"irrrob"`, `"diff"`, `"fold"`, `"foldlog"`, `"quantreg"`, or `"or"` to obtain "joint" models for exposure and effect modifier that have a single reference category.
- 
-Example: `type = "hr_joint"`.
-
-Note that the joint model will be fit across all non-missing (`NA`) strata of the effect modifier, even if the `design` table does not request all strata be shown.
-
 
 # Special types
 
diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd
index 06d78d1..f326de3 100644
--- a/vignettes/faq.Rmd
+++ b/vignettes/faq.Rmd
@@ -289,15 +289,40 @@ rifttable(
 
 
 # How can I model differences in continous outcomes?
+# How can I create joint models?
 
 The following two example use the `cancer` data from the survival package. To compare patient age by ECOG performance status, first recode the variable `ph.ecog` to be categorical (a `factor`), and exclude patients with rare elevated `ecog.ps`:
+By default, regression models will be fit separately for each stratum of the `effect_modifier`. 
 
 ```{r setup}
 data(cancer, package = "survival")
+Append `"_joint"` to `"hr"`, `"rr"`, `"rd"`, `"irr"`, `"irrrob"`, `"diff"`, `"fold"`, `"foldlog"`, `"quantreg"`, or `"or"` to obtain "joint" models for exposure and effect modifier that have a single reference category.
+ 
+Note that the joint model will be fit across all non-missing (`NA`) strata of the effect modifier, even if the `design` table does not request all strata be shown.
 
 cancer <- cancer %>%
   filter(ph.ecog < 3) %>%
   mutate(ph.ecog = factor(ph.ecog))
+Compare stratified models to joint models for risk differences (for simplicity of presentation, count data are omitted):
+
+```{r joint}
+tribble(
+  ~label,                       ~type,      ~stratum,         
+  "**Overall**",                "rd",       c("Low", "High"), 
+  "",                           "",         "",               
+  "**Stratified models**",      "",         "",               
+  "  Low hormone receptor",     "rd",       "Low",           
+  "  High hormone receptor",    "rd",       "High",           
+  "",                           "",         "",               
+  "**Joint models**",           "",         "",               
+  "  Low hormone receptor",     "rd_joint", "Low",
+  "  High hormone receptor",    "rd_joint", "High") %>%
+  mutate(
+    exposure = "stage",
+    outcome = "death",
+    effect_modifier = "receptor") %>%
+  rifttable(data = breastcancer) %>%
+  rt_gt()
 ```
 
 

From fc4de59de6195a86ba77ad781b75545edc4acb99 Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:42:11 -0400
Subject: [PATCH 4/9] separate continuous outcomes (+ratios/differences)

---
 vignettes/estimators.Rmd            | 23 -------
 vignettes/estimators_continuous.Rmd | 99 +++++++++++++++++++++++++++++
 vignettes/faq.Rmd                   | 43 -------------
 3 files changed, 99 insertions(+), 66 deletions(-)
 create mode 100644 vignettes/estimators_continuous.Rmd

diff --git a/vignettes/estimators.Rmd b/vignettes/estimators.Rmd
index e0c0b79..26837c9 100644
--- a/vignettes/estimators.Rmd
+++ b/vignettes/estimators.Rmd
@@ -14,29 +14,6 @@ knitr::opts_chunk$set(
 )
 ```
 
-# Continuous outcomes
-
-## Comparative estimates with 95% confidence intervals
-
-`type` | Description | Options (`arguments = `)
--------+-------------+--------------
-`"diff"` | Mean difference from linear model. |
-`"fold"` | Fold change from generalized linear model with log link (i.e., ratio of arithmetic means).
-`"foldlog"` | Fold change from linear model after log transformation of the outcome (*i.e.*, ratio of geometric means).
-`"quantreg"` | Quantile difference from quantile regression using `quantreg::rq()` with `method = "fn"`. By default, this is the difference in medians. | `list(tau = 0.75)` to change the quantile to, e.g., the 75th percentile.
-   
-## Absolute estimates per exposure category
-
-`type` | Description | Options (`arguments = `)
--------+-------------+--------------
-`"range"` | Range: Minimum to maximum value.
-`"mean"` | Mean (arithmetic mean).
-`"mean (ci)"` | Mean and 95% CI.
-`"mean (sd)"` | Mean and standard deviation.
-`"geomean"` | Geometric mean.
-`"median"` | Median.
-`"median (iqr)"` | Median and interquartile range.
-
 
 # Binary outcomes
 
diff --git a/vignettes/estimators_continuous.Rmd b/vignettes/estimators_continuous.Rmd
new file mode 100644
index 0000000..66f5fb3
--- /dev/null
+++ b/vignettes/estimators_continuous.Rmd
@@ -0,0 +1,99 @@
+---
+title: "Continuous outcomes"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Continuous outcomes}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+# Example
+
+The example uses the `cancer` data from the survival package. To compare patient age by ECOG performance status, recode the variable `ph.ecog` to be categorical (a `factor`) and exclude patients with rare elevated `ecog.ps`:
+
+```{r ex_continuous, message = FALSE}
+library(dplyr)
+library(rifttable)
+data(cancer, package = "survival")
+cancer <- cancer %>%
+  filter(ph.ecog < 3) %>%
+  mutate(ph.ecog = factor(ph.ecog))
+attr(cancer$ph.ecog, which = "label") <- "ECOG performance status"
+
+tribble(
+  ~label,                                       ~type,
+  "**Absolute estimates**",                     "",
+  "Observations",                               "total",
+  "Range",                                      "range",
+  "Mean",                                       "", 
+  "  Mean (i.e., arithmetic mean)",             "mean",
+  "  Mean (95% CI)",                            "mean (ci)",
+  "  Mean (standard deviation)",                "mean (sd)",
+  "  Geometric mean",                           "geomean",
+  "Median",                                     "median",
+  "Median (interquartile range)",               "median (iqr)",
+  "",                                           "",
+  "**Comparative estimates**",                  "",
+  "Mean difference (95% CI)",                   "diff",
+  "Median difference (95% CI)",                 "quantreg",
+  "Mean ratio",                                 "",
+  "  of arithmetic means",                      "fold",
+  "  of arithmetic means, empirical SE",        "irrrob",
+  "  of geometric means",                       "foldlog") %>%
+  mutate(
+    exposure = "ph.ecog",
+    outcome = "age") %>%
+  rifttable(
+    data = cancer, 
+    diff_digits = 1,  # Suppress unnecessary precision in most estimates
+    # Show extraneous digits to highlight (minor) differences in ratio  models:
+    ratio_digits = 3,
+    overall = TRUE) %>%  
+  rt_gt()  # obtain formatted output
+```
+
+
+# Absolute estimates per exposure category
+
+`type` | Description | Options (`arguments = `)
+-------+-------------+--------------
+`"range"` | Range: Minimum to maximum value.
+`"mean"` | Mean (arithmetic mean).
+`"mean (ci)"` | Mean and CI (default: 95%).
+`"mean (sd)"` | Mean and standard deviation.
+`"geomean"` | Geometric mean.
+`"median"` | Median.
+`"median (iqr)"` | Median and interquartile range.
+
+
+# Comparative estimates with confidence intervals
+
+`type` | Description | Options (`arguments = `)
+-------+-------------+--------------
+`"diff"` | Mean difference from linear model. |
+`"fold"` | Fold change from generalized linear model with log link (i.e., ratio of arithmetic means).
+`"foldlog"` | Fold change from linear model after log transformation of the outcome (*i.e.*, ratio of geometric means).
+`"irrrob"` | Fold change from generalized linear model with Poisson distribution and log link and robust (sandwich) standard errors
+`"quantreg"` | Quantile difference from quantile regression using `quantreg::rq()` with `method = "fn"`. By default, this is the difference in medians. | `list(tau = 0.75)` to change the quantile to, e.g., the 75th percentile.
+   
+
+# More on ratios of continuous outcomes
+
+Three types of ratios for continuous outcomes are implemented in `rifttable()`:
+
+`type =` | Ratio of ... | Model | Use
+-------|----------|-----------------------|-----------------------------
+`"fold"` | Arithmetic means | Generalized linear model with Gaussian distribution and log link: `glm(y ~ x, family = gaussian(link = "log"))` | Tolerates `0` in the `outcome` variable. May be informative if outcome is normally distributed without transformation.
+`"irrrob"` | Arithmetic means | Generalized linear model with Poisson distribution and log link: `glm(y ~ x, family = poisson())`, with robust (sandwich) standard errors | Tolerates `0` in the `outcome` variable. May be informative if outcome is normally distributed without transformation.
+`"foldlog"` | Geometric means | Linear model with log-transformed outcome: `lm(log(y) ~ x)` | Does not tolerate `0` in the `outcome` variable, as `log(0)` is undefined (R returns `-Inf`). May be informative if outcome is normally distributed after log transformation.
+
+In all models, after exponentiation, beta coefficients can be interpreted as ratios. rifttable automatically does all necessary transformations.
+
+In the `cancer` data, ratios of usual (arithmetic) means of age could be considered informative, given that `hist(cancer$age)` etc. does not show a major skew in this outcome.
diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd
index f326de3..60c12bb 100644
--- a/vignettes/faq.Rmd
+++ b/vignettes/faq.Rmd
@@ -288,21 +288,14 @@ rifttable(
 ```
 
 
-# How can I model differences in continous outcomes?
 # How can I create joint models?
 
-The following two example use the `cancer` data from the survival package. To compare patient age by ECOG performance status, first recode the variable `ph.ecog` to be categorical (a `factor`), and exclude patients with rare elevated `ecog.ps`:
 By default, regression models will be fit separately for each stratum of the `effect_modifier`. 
 
-```{r setup}
-data(cancer, package = "survival")
 Append `"_joint"` to `"hr"`, `"rr"`, `"rd"`, `"irr"`, `"irrrob"`, `"diff"`, `"fold"`, `"foldlog"`, `"quantreg"`, or `"or"` to obtain "joint" models for exposure and effect modifier that have a single reference category.
  
 Note that the joint model will be fit across all non-missing (`NA`) strata of the effect modifier, even if the `design` table does not request all strata be shown.
 
-cancer <- cancer %>%
-  filter(ph.ecog < 3) %>%
-  mutate(ph.ecog = factor(ph.ecog))
 Compare stratified models to joint models for risk differences (for simplicity of presentation, count data are omitted):
 
 ```{r joint}
@@ -326,54 +319,23 @@ tribble(
 ```
 
 
-Using `rifttable()`, one can calculate mean differences (`type = "diff"`) or quantile differences (`type = "quantreg"`, by default, at the median):
 
-```{r table2_diff}
 tribble(
-  ~label,                ~type,
-  "Observations",        "total",
-  "Mean age",            "mean",
-  "  Mean difference",   "diff",
-  "Median age",          "median",
-  "  Median difference", "quantreg") %>%
   mutate(
-    exposure = "ph.ecog",
-    outcome = "age") %>%
-  rifttable(data = cancer) %>%
-  rt_gt()  # obtain formatted output
 ```
 
 
-# How can I model ratios of continuous outcomes?
 
-Three types of ratios for continuous outcomes are implemented in `rifttable()`:
 
-`type =` | Ratio of ... | Model | Use
--------|----------|-----------------------|-----------------------------
-`"fold"` | Arithmetric means | Generalized linear model with Gaussian distribution and log link: `glm(y ~ x, family = gaussian(link = "log"))` | Tolerates `0` in the `outcome` variable. May be informative if outcome is normally distributed without transformation.
-`"irrrob"` | Arithmetric means | Generalized linear model with Poisson distribution and log link: `glm(y ~ x, family = poisson())`, with robust (sandwich) standard errors | Tolerates `0` in the `outcome` variable. May be informative if outcome is normally distributed without transformation.
-`"foldlog"` | Geometric means | Linear model with log-transformed outcome: `lm(log(y) ~ x)` | Does not tolerate `0` in the `outcome` variable, as `log(0)` is undefined (R returns `-Inf`). May be informative if outcome is normally distributed after log transformation.
 
-In all models, after exponentiation, beta coefficients can be interpreted as ratios. rifttable automatically does all necessary transformations.
 
-In the `cancer` data (see previous question), ratios of usual (arithmetic) means of age could be considered informative, given that `hist(cancer$age)` etc. does not show a major skew in this outcome.
 
-```{r table2_ratio}
 # How I do I change the level for confidence intervals?
 
 Add a `ci` column to the `design`:
 
 ```{r change_ci}
 tribble(
-  ~label,                                       ~type,
-  "Observations",                               "total",
-  "Mean age",                                   "", 
-  "  Arithmetic mean",                          "mean",
-  "  Geometric mean",                           "geomean",
-  "Mean ratio",                                 "",
-  "  Ratio of arithmetric means",               "fold",
-  "  Ratio of arithmetric means, empirical SE", "irrrob",
-  "  Ratio of geometric means",                 "foldlog") %>%
   ~label,            ~type,                   ~ci,
   "Deaths/N (Risk)", "outcomes/total (risk)", NA,
   "Risk ratio",      "",                      NA,
@@ -381,19 +343,14 @@ tribble(
   "  95% CI",        "rr",                    NA,  # Defaults to 0.95
   "  99% CI",        "rr",                    0.99) %>%
   mutate(
-    exposure = "ph.ecog",
-    outcome = "age") %>%
     exposure = "stage",
     outcome = "death") %>%
   rifttable(
-    data = cancer, 
-    ratio_digits = 3) %>%  # show extra digits to highlight (minor) differences
     data = breastcancer,
     risk_percent = TRUE) %>%
   rt_gt()  # obtain formatted output
 ```
 
-Patients with ECOG performance status of 2, compared to those with performance status 0, are on average 8% older (or 9%, if based on geometric means).
 
 # How do I make rifttable calculate an estimand that is not built-in?
 

From 13204d73b21c9ae2fcca1c92b3d8b26ae02823ba Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:42:50 -0400
Subject: [PATCH 5/9] separate binary outcomes

---
 vignettes/estimators.Rmd        | 23 -----------
 vignettes/estimators_binary.Rmd | 71 +++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 23 deletions(-)
 create mode 100644 vignettes/estimators_binary.Rmd

diff --git a/vignettes/estimators.Rmd b/vignettes/estimators.Rmd
index 26837c9..45723fb 100644
--- a/vignettes/estimators.Rmd
+++ b/vignettes/estimators.Rmd
@@ -15,29 +15,6 @@ knitr::opts_chunk$set(
 ```
 
 
-# Binary outcomes
-
-## Comparative estimates with 95% confidence intervals
-
-`type` | Description | Options (`arguments = `)
--------+-------------+--------------
-`"irr"`   | Incidence rate ratio for count outcomes from Poisson regression model.
-`"irrrob"` | Ratio for other outcomes from Poisson regression model with robust (sandwich) standard errors.
-`"or"` | Odds ratio from logistic regression.
-`"rd"` | Risk difference (or prevalence difference) from risks::riskdiff(). | `list(approach = "margstd_boot", bootrepeats = 2000)` to request model fitting via marginal standardization with 2000 bootstraps.
-`"rr"` | Risk ratio (or prevalence ratio) from `risks::riskratio()`. | See `"rd"`.
-
-## Absolute estimates per exposure category
-
-`type` | Description | Options (`arguments = `)
--------+-------------+--------------
-`"cases/controls"` | Cases and non-cases (events and non-events); useful for case-control studies.
-`"outcomes"` | Outcome count.
-`"outcomes (risk)"` | A combination: Outcomes followed by risk in parentheses.
-`"outcomes/total (risk)"` | A combination: Outcomes slash total followed by risk in parentheses.
-`"risk"` | Risk (or prevalence), calculated as a proportion, *i.e.*, outcomes divided by number of observations. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`.
-`"risk (ci)"` | Risk with 95% confidence interval (Wilson score interval for binomial proportions, see `rifttable::scoreci()`).
-
 
 # Time-to-event/survival outcomes
 
diff --git a/vignettes/estimators_binary.Rmd b/vignettes/estimators_binary.Rmd
new file mode 100644
index 0000000..af7f86c
--- /dev/null
+++ b/vignettes/estimators_binary.Rmd
@@ -0,0 +1,71 @@
+---
+title: "Binary outcomes"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Binary outcomes}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+# Example
+
+The example uses the `breastcancer` data set from the risks package and compares the risk of death (a binary variable in this case) by categories of cancer stage.
+
+```{r example, message = FALSE}
+library(rifttable)
+library(dplyr)
+data(breastcancer, package = "risks")
+
+tibble::tribble(
+  ~label,                      ~type,
+  "**Absolute estimates**",    "",
+  "Observations",              "total",
+  "Outcomes",                  "outcomes",
+  "Outcomes/Total",            "outcomes/total",
+  "Cases/Controls",            "cases/controls",
+  "Risk",                      "risk",
+  "Risk (95% CI)",             "risk (ci)",
+  "Outcomes (Risk)",           "outcomes (risk)",
+  "Outcomes/Total (Risk)",     "outcomes/total (risk)",
+  "",                          "",
+  "**Comparative estimates**", "",
+  "Risk ratio (95% CI)",       "rr",
+  "Risk difference (95% CI)",  "rd",
+  "Odds ratio (95% CI)",       "or") %>%
+  mutate(
+    exposure = "stage",
+    outcome = "death") %>%
+  rifttable(
+    data = breastcancer,
+    overall = TRUE) %>%
+  rt_gt()  # Formatted output
+```
+
+
+# Absolute estimates per exposure category
+
+`type` | Description | Options (`arguments = `)
+-------+-------------+--------------
+`"cases/controls"` | Cases and non-cases (events and non-events); useful for case-control studies.
+`"outcomes"` | Outcome count.
+`"outcomes (risk)"` | A combination: Outcomes followed by risk in parentheses.
+`"outcomes/total (risk)"` | A combination: Outcomes slash total followed by risk in parentheses.
+`"risk"` | Risk (or prevalence), calculated as a proportion, *i.e.*, outcomes divided by number of observations. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`.
+`"risk (ci)"` | Risk with confidence interval (default: 95%): Wilson score interval for binomial proportions, see `rifttable::scoreci()`.
+
+
+# Comparative estimates with confidence intervals
+
+`type` | Description | Options (`arguments = `)
+-------+-------------+--------------
+`"irr"` | Incidence rate ratio for count outcomes from Poisson regression model, with confidence interval (default: 95%).
+`"or"` | Odds ratio from logistic regression, with confidence interval (default: 95%).
+`"rd"` | Risk difference (or prevalence difference) from risks::riskdiff(), with confidence interval (default: 95%). | `list(approach = "margstd_boot", bootrepeats = 2000)` to request model fitting via marginal standardization with 2000 bootstraps.
+`"rr"` | Risk ratio (or prevalence ratio) from `risks::riskratio()`, with confidence interval (default: 95%). | See `"rd"`.

From b58dad026287dbfed0ed3b0b4a1a4ed342ca0c00 Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:43:35 -0400
Subject: [PATCH 6/9] separate survival outcomes

---
 vignettes/estimators.Rmd          |  38 -----------
 vignettes/estimators_survival.Rmd | 105 ++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 38 deletions(-)
 create mode 100644 vignettes/estimators_survival.Rmd

diff --git a/vignettes/estimators.Rmd b/vignettes/estimators.Rmd
index 45723fb..1a76b82 100644
--- a/vignettes/estimators.Rmd
+++ b/vignettes/estimators.Rmd
@@ -14,42 +14,6 @@ knitr::opts_chunk$set(
 )
 ```
 
-
-
-# Time-to-event/survival outcomes
-
-## Comparative estimates with 95% confidence intervals
-
-`type` | Description | Options (`arguments = `)
--------+-------------+--------------
-`"cumincdiff"` | Difference in cumulative incidence from Kaplan-Meier estimator. Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in cumulative incidence at 2.5 years.
-`"hr"`    | Hazard ratio from Cox proportional hazards regression. | `list(robust = TRUE)` for robust (sandwich) standard errors; `list(weights = "weight_variable", robust = TRUE)` for sampling (e.g., inverse-probability) weights and robust errors. Use `"+ cluster(id_variable)"` in `confounders` to obtain clustering.
-`"survdiff"` | Difference in survival from Kaplan-Meier estimator.  Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in survival at 2.5 years.
-
-   
-## Absolute estimates per exposure category
-
-`type` | Description | Options (`arguments = `)
--------+-------------+--------------
-`"cuminc"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator. If no time point is provided, returns cumulative incidence at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show cumulative incidence at 2.5 years.
-`"cuminc (ci)"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator with 95% confidence intervals (Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"cuminc"`.
-`"events"` | Event count.
-`"events/time"` | Events slash person-time.
-`"events/time (rate)"` | A combination: Events slash time followed by rate in parentheses.
-`"events/total"` | Events slash number of observations.
-`"rate"` | Event rate: event count divided by person-time, multiplied by the `rifttable()` parameter `factor`.
-`"rate (ci)"` | Event rate with 95% confidence interval (Poisson-type large-sample interval).
-`"surv"` | Survival from the Kaplan-Meier estimator. If no time point is provided, returns survival at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show survival at 2.5 years.
-`"surv (ci)"` | Survival from the Kaplan-Meier estimator with 95% confidence interval (Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"surv"`.
-`"time"` | Person-time.
-`"maxfu"` | Maximum follow-up time.
-`"medfu"` | Median follow-up (reverse Kaplan-Meier), equals median survival for censoring.
-`"medfu (iqr)"` | Median and interquartile range for follow-up.
-`"medsurv"` | Median survival.
-`"medsurv (ci)"` | Median survival with 95% confidence interval.
-
-
-
 # Special types
 
 `type` | Description | Options (`arguments = `)
@@ -57,5 +21,3 @@ knitr::opts_chunk$set(
 `"blank"` or `""` | An empty line
 `"custom_function"` | A user-defined function. In this example, the corresponding function must be called `estimate_custom_function()` | Any
 `"total"` | Number of observations.
-
-The reference categories for exposure and effect modifier are their first factor levels, which can be changed using `forcats::fct_relevel()`. 
diff --git a/vignettes/estimators_survival.Rmd b/vignettes/estimators_survival.Rmd
new file mode 100644
index 0000000..ab07f6b
--- /dev/null
+++ b/vignettes/estimators_survival.Rmd
@@ -0,0 +1,105 @@
+---
+title: "Survival outcomes"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Survival outcomes}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+# Example
+
+The example uses the `cancer` data set from the survival package and compares survival, defined by the time variable `time` (recoded to years) and the event variable `status` (recoded to 1 = death, 0 = censored), by sex.
+
+```{r example}
+library(rifttable)
+data(cancer, package = "survival")
+
+cancer <- cancer %>%
+  tibble::as_tibble() %>%
+  dplyr::mutate(
+    # The exposure (here, 'sex') must be categorical (a factor)
+    sex = factor(
+      sex,
+      levels = 1:2,
+      labels = c("Male", "Female")),
+    time = time / 365.25,  # transform to years
+    status = status - 1)
+
+tibble::tribble(
+  ~label,                                  ~type,
+  "**Absolute estimates**",                "",
+  "*Counts and sums*",                     "",
+  "  Observations, *N*",                   "total",
+  "  Events, *n*",                         "events",
+  "  Events/observations",                 "events/total",
+  "  Events/person-years",                 "events/time",
+  "*Follow-up*",                           "",
+  "  Person-years",                        "time",
+  "  Maximum follow-up, years",            "maxfu",
+  "  Median follow-up, years",             "medfu",
+  "  Median follow-up (IQR), years",       "medfu (iqr)",
+  "*Rates*",                               "",
+  "  Rate per 1000 person-years",          "rate",
+  "  Rate per 1000 person-years (95% CI)", "rate (ci)",
+  "  Events/py (rate per 1000 py)",        "events/time (rate)",
+  "*Risks*",                               "",
+  "  1-year survival",                     "surv",
+  "  1-year survival (95% CI)",            "surv (ci)",
+  "  1-year risk/cumulative incidence",    "cuminc",
+  "  1-year risk (95% CI)",                "cuminc (ci)",
+  "  Median survival, years",              "medsurv",
+  "  Median survival (95 CI), years",      "medsurv (ci)",
+  "",                                      "",
+  "**Comparative estimates**",             "",
+  "Difference in 1-year survival",         "survdiff",
+  "Difference in 1-year risk",             "cumincdiff",
+  "Hazard ratio (95% CI)",                 "hr") %>%
+  dplyr::mutate(
+    time = "time", 
+    event = "status", 
+    exposure = "sex",
+    arguments = list(list(timepoint = 1))) %>%
+  rifttable(
+    data = cancer, 
+    overall = TRUE) %>%
+  rt_gt()
+```
+
+
+# Absolute estimates per exposure category
+
+`type` | Description | Options (`arguments = `)
+-------+-------------+--------------
+`"cuminc"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator. If no time point is provided, returns cumulative incidence at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show cumulative incidence at 2.5 years.
+`"cuminc (ci)"` | Cumulative incidence ("risk") from the Kaplan-Meier estimator with confidence intervals (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"cuminc"`.
+`"events"` | Event count.
+`"events/time"` | Events slash person-time.
+`"events/time (rate)"` | A combination: Events slash time followed by rate in parentheses.
+`"events/total"` | Events slash number of observations.
+`"rate"` | Event rate: event count divided by person-time, multiplied by the `rifttable()` parameter `factor`.
+`"rate (ci)"` | Event rate with confidence interval (default: 95%; Poisson-type large-sample interval).
+`"surv"` | Survival from the Kaplan-Meier estimator. If no time point is provided, returns survival at end of follow-up. Change between display as proportion or percent using the parameter `risk_percent` of `rifttable()`. | `list(timepoint = 2.5)` to show survival at 2.5 years.
+`"surv (ci)"` | Survival from the Kaplan-Meier estimator with confidence interval (default: 95%; Greenwood standard errors with log transformation, the default of the survival package/`survival::survfit()`). | See `"surv"`.
+`"time"` | Person-time.
+`"maxfu"` | Maximum follow-up time.
+`"medfu"` | Median follow-up (reverse Kaplan-Meier), equals median survival for censoring.
+`"medfu (iqr)"` | Median and interquartile range for follow-up.
+`"medsurv"` | Median survival.
+`"medsurv (ci)"` | Median survival with confidence interval (default: 95%).
+
+
+# Comparative estimates with confidence intervals
+
+`type` | Description | Options (`arguments = `)
+-------+-------------+--------------
+`"cumincdiff"` | Difference in cumulative incidence from Kaplan-Meier estimator, with confidence interval (default: 95%). Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in cumulative incidence at 2.5 years.
+`"hr"`    | Hazard ratio from Cox proportional hazards regression, with confidence interval (default: 95%). | `list(robust = TRUE)` for robust (sandwich) standard errors; `list(weights = "weight_variable", robust = TRUE)` for sampling (e.g., inverse-probability) weights and robust errors. Use `"+ cluster(id_variable)"` in `confounders` to obtain clustering.
+`"survdiff"` | Difference in survival from Kaplan-Meier estimator, with confidence interval (default: 95%).  Cannot not handle confounders. Uses `rifttable::survdiff_ci()`. | `list(timepoint = 2.5)` to evaluate differences in survival at 2.5 years.

From ca7b13d2737b171cf94ff2d48ce405a378ff3bd0 Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:45:01 -0400
Subject: [PATCH 7/9] how to change reference category

---
 vignettes/faq.Rmd | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd
index 60c12bb..3b630e3 100644
--- a/vignettes/faq.Rmd
+++ b/vignettes/faq.Rmd
@@ -319,15 +319,57 @@ tribble(
 ```
 
 
+# How can I change the reference category?
 
+The reference categories for exposure and effect modifier are always their first factor levels. Compare the preceding example: `"High"` is, alphabetically, before `"Low"`. To change the reference category, use `forcats::fct_relevel()` or the base R alternative `relevel()` on variables in the `data` provided to `rifttable()`: 
+
+```{r joint_reorder}
 tribble(
+  ~label,                       ~type,      ~stratum,         
+  "**Joint models**",           "",         "",               
+  "  Low hormone receptor",     "rd_joint", "Low",
+  "  High hormone receptor",    "rd_joint", "High") %>%
   mutate(
+    exposure = "stage",
+    outcome = "death",
+    effect_modifier = "receptor") %>%
+  rifttable(
+    data = breastcancer %>%
+      mutate(
+        receptor = relevel(
+          factor(receptor),  # Make "receptor" a factor in the first place
+          ref = "Low"))) %>%  # Set new reference category
+  rt_gt()
 ```
 
+If a middle category of the exposure `stage` is desired as the reference:
+
+```{r reorder_middle}
+result_reordered <- tibble(
+  label = "**RD (95% CI)**",
+  type = "rd",
+  exposure = "stage",
+  outcome = "death") %>%
+  rifttable(
+    data = breastcancer %>%
+      mutate(
+        stage = relevel(
+          stage,
+          ref = "Stage II")))
 
+result_reordered %>%
+  rt_gt()
+```
 
+Using `forcats::fct_relevel()` may be preferable over `relevel()`, as it preserves the variable label: here, the variable `stage` lost its label, `"Stage"` starting with upper case S.
 
+That results for "Stage II" are now listed first is probably undesirable. Reorder the columns of the table that `rifttable()` produced to print results for "Stage I" first:
 
+```{r reorder_middle2}
+result_reordered %>%
+  select(stage, "Stage I", everything()) %>%
+  rt_gt()
+```
 
 
 # How I do I change the level for confidence intervals?

From 7339863f94e30a83638cc48f4691d0618aa78ef2 Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:45:16 -0400
Subject: [PATCH 8/9] remove unused effect modifier

---
 vignettes/faq.Rmd | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd
index 3b630e3..147ba89 100644
--- a/vignettes/faq.Rmd
+++ b/vignettes/faq.Rmd
@@ -232,7 +232,6 @@ By default, difference measures are being rounded to 2 decimal digits (`0.01`),
 
 Rounding can be changed by setting the `rifttable()` arguments `diff_digits`, `risk_digits`, and `ratio_digits` globally for the entire table. 
 
-
 ```{r rounding}
 design <- tribble(
   ~label,                     ~type,
@@ -243,8 +242,7 @@ design <- tribble(
   "Risk difference (95% CI)", "rd") %>%
   mutate(
     exposure = "stage",
-    outcome = "death",
-    effect_modifier = "receptor")
+    outcome = "death")
 
 rifttable(
   design = design,
@@ -279,8 +277,7 @@ tribble(
   "Risk difference (95% CI)", "rd",             3) %>%  # Overrides risk_digits
   mutate(
     exposure = "stage",
-    outcome = "death",
-    effect_modifier = "receptor") %>%
+    outcome = "death") %>%
 rifttable(
   data = breastcancer,
   risk_digits = 1) %>%  # Fewer digits for risks, unless specified by "digits"

From 6734389ad575097cf4ea55690bda422dd3d5916b Mon Sep 17 00:00:00 2001
From: "Konrad H. Stopsack" <35557324+stopsack@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:45:38 -0400
Subject: [PATCH 9/9] restructure site navigation

---
 _pkgdown.yml             | 33 ++++++++++++++++++++++++++-------
 vignettes/estimators.Rmd | 23 -----------------------
 2 files changed, 26 insertions(+), 30 deletions(-)
 delete mode 100644 vignettes/estimators.Rmd

diff --git a/_pkgdown.yml b/_pkgdown.yml
index 4baa981..e5f5cb6 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -2,10 +2,29 @@ use_url: ~
 template:
   bootstrap: 5
 
-articles:
-- title: Main
-  navbar: ~
-  contents:
-  - faq
-  - table1
-  - estimators
+navbar:
+  structure:
+    left:  [intro, faq, tables, more]
+    right: [search, github]
+  components:
+   faq:
+    text: FAQ
+    href: articles/faq.html
+   tables:
+    text: Example Tables
+    menu:
+    - text: A Descriptive Table 1
+      href: articles/table1.html
+    - text: Binary Outcomes
+      href: articles/estimators_binary.html
+    - text: Continuous Outcomes
+      href: articles/estimators_continuous.html
+    - text: Survival Outcomes
+      href: articles/estimators_survival.html
+   more:
+    text: More
+    menu:
+    - text: Function reference
+      href: reference/index.html
+    - text: Changelog
+      href: news/index.html
diff --git a/vignettes/estimators.Rmd b/vignettes/estimators.Rmd
deleted file mode 100644
index 1a76b82..0000000
--- a/vignettes/estimators.Rmd
+++ /dev/null
@@ -1,23 +0,0 @@
----
-title: "Available estimators ('type')"
-output: rmarkdown::html_vignette
-vignette: >
-  %\VignetteIndexEntry{Available estimators ('type')}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
----
-
-```{r, include = FALSE}
-knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>"
-)
-```
-
-# Special types
-
-`type` | Description | Options (`arguments = `)
--------+-------------+--------------
-`"blank"` or `""` | An empty line
-`"custom_function"` | A user-defined function. In this example, the corresponding function must be called `estimate_custom_function()` | Any
-`"total"` | Number of observations.