stefvanbuuren · vkhodygo · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
diff --git a/R/fimd.R b/R/fimd.R
@@ -155,7 +155,7 @@ data <- complete(imp)
 Yobs <- airquality[, "Ozone"]
 Yimp <- data[, "Ozone"]
 mi.hist(Yimp, Yobs,
-        b = seq(-20, 200, 10), type = "continuous",
+        b = seq(-40, 200, 10), type = "continuous",
         gray = FALSE, lwd = lwd,
         obs.lwd = 1.5, mis.lwd = 1.5, imp.lwd = 1.5,
         obs.col = mdc(4),mis.col = mdc(5), imp.col = "transparent",
@@ -387,15 +387,15 @@ test.impute <- function(data, m = 5, method = "norm", ...) {
   imp <- mice(data, method = method, m = m, print = FALSE, ...)
   fit <- with(imp, lm(y ~ x))
   tab <- summary(pool(fit), "all", conf.int = TRUE)
-  as.numeric(tab["x", c("estimate", "2.5 %", "97.5 %")])
+  as.numeric(tab[2, c("estimate", "conf.low", "conf.high")])
 }
 
 ## ----lin.sim4------------------------------------------------------------
 simulate <- function(runs = 10) {
   res <- array(NA, dim = c(2, runs, 3))
   dimnames(res) <- list(c("norm.predict", "norm.nob"),
                         as.character(1:runs),
-                        c("estimate", "2.5 %","97.5 %"))
+                        c("estimate", "conf.low", "conf.high"))
   for(run in 1:runs) {
     data <- create.data(run = run)
     data <- make.missing(data)
@@ -416,8 +416,8 @@ apply(res, c(1, 3), mean, na.rm = TRUE)
 true <- 1
 RB <- rowMeans(res[,, "estimate"]) - true
 PB <- 100 * abs((rowMeans(res[,, "estimate"]) - true)/ true)
-CR <- rowMeans(res[,, "2.5 %"] < true & true < res[,, "97.5 %"])
-AW <- rowMeans(res[,, "97.5 %"] - res[,, "2.5 %"])
+CR <- rowMeans(res[,, "conf.low"] < true & true < res[,, "conf.high"])
+AW <- rowMeans(res[,, "conf.high"] - res[,, "conf.low"])
 RMSE <- sqrt(rowMeans((res[,, "estimate"] - true)^2))
 data.frame(RB, PB, CR, AW, RMSE)
 
@@ -1466,7 +1466,11 @@ imp1 <- mice(data, meth = meth, pred = pred, m = 10,
              print = FALSE)
 
 ## ----compos4-------------------------------------------------------------
-round(summary(pool(with(imp1, lm(Y3 ~ Y1 + Y2))))[, 1:2], 2)
+rapply(summary(pool(with(imp1, lm(Y3 ~ Y1 + Y2))))[, 1:2],
+       round,
+       classes = "numeric",
+       how = "replace",
+       digits = 2)
 
 ## ----composition, echo=FALSE, fig.width=7, fig.height=4---------------------
 meth <- make.method(data)
@@ -1551,7 +1555,7 @@ vis <- c("hgt", "wgt", "hc", "wgt.hc", "gen", "phb",
          "tv", "reg")
 expr <- expression((wgt - 40) * (hc - 50))
 boys$wgt.hc <- with(boys, eval(expr))
-imp.int2 <- mice(boys, m = 1, maxit = 1, visitSequence = vis,
+imp.int2 <- mice(boys, m = 1, maxit = 2, visitSequence = vis,
                  meth = imp.int$meth, pred = imp.int$pred,
                  seed = 23390)
 
@@ -1642,7 +1646,6 @@ pkg <- c("mice", "micemd", "lme4", "tidyr",
 loaded <- sapply(pkg, require, character.only = TRUE,
                  warn.conflicts = FALSE, quietly = TRUE)
 suppressPackageStartupMessages(library(miceadds, warn.conflicts = FALSE, quietly = TRUE))
-suppressPackageStartupMessages(library(DPpackage, warn.conflicts = FALSE, quietly = TRUE))
 suppressPackageStartupMessages(library(mitml, warn.conflicts = FALSE, quietly = TRUE))
 
 
@@ -1774,7 +1777,7 @@ grid.arrange(p.pan, p.pmm)
 
 ## ----toenail.2l.3, cache=TRUE--------------------------------------------
 library(tidyr)
-data("toenail", package = "DPpackage")
+data("toenail", package = "mice")
 data <- tidyr::complete(toenail, ID, visit) %>%
   tidyr::fill(treatment) %>%
   dplyr::select(-month)
@@ -1832,6 +1835,7 @@ print(tp1, newpage = FALSE)
 
 ## ----toenail.est, cache=TRUE---------------------------------------------
 library(purrr)
+library(broom.mixed)
 mice::complete(imp, "all") %>%
   purrr::map(lme4::glmer,
              formula = outcome ~ treatment * visit + (1 | ID),
@@ -2388,14 +2392,8 @@ xyplot(jitter(rrdiast, 10) ~ jitter(rrsyst, 10) | typ,
 dat <- cbind(data2, dead = 1 - data2$dwa)
 hazard <- nelsonaalen(dat, survda, dead)
 
-## ----c85nelsoncorrelation, eval=FALSE, echo=FALSE------------------------
-tmp <- data.frame(hazard, t = data2$survda,
-                  logt = log(data2$survda),
-                  SBP = data2$rrsyst, DBP = data2$rrdiast)
-round(cor(tmp, use = "pair"), 3)
-
 ## ----c85km, echo=FALSE, fig.height=4-------------------------------------
-library(survival)
+suppressPackageStartupMessages(library(survival))
 fit <- survfit(Surv(survda / 365, 1 - dwa) ~ is.na(rrsyst),
                data = data2)
 lwd <- 0.6
@@ -2541,21 +2539,6 @@ text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3)
 box(lwd = lwd)
 }
 
-## ----walkinginit, echo=FALSE---------------------------------------------
-library(mice)
-library(foreign)
-dataproject <- "Data/wad"
-.store <- file.path(dataproject,"R/store/")
-
-file <- file.path(dataproject, "original/walklink.sav")
-original <- read.spss(file=file, to.data.frame=TRUE)
-data <- original
-names(data) <- c("src","sex","age","sip1","haq8","grs9")
-data$src <- factor(data$src, levels=2:3, labels=c("Ergo","Euri"))
-data$sip1 <- as.factor(data$sip1)
-data$haq8 <- as.ordered(data$haq8)
-data$grs9 <- as.ordered(data$grs9)
-
 ## ----walkingimpute1------------------------------------------------------
 fA <- c(242, 43, 15, 0, 6)
 fB <- c(145, 110, 29, 8)
@@ -2633,16 +2616,10 @@ micemill <- function(n) {
                          as.numeric(YB[src=="A"]),
                          method="kendall"))
     tau <<- rbind(tau, ra(cors, s=TRUE))  # global assignment
-    means <- with(imp, mean(as.numeric(YA[src=="A"]), na.rm=TRUE))
-    thetaBA <<- rbind(thetaBA, ra(means, s=TRUE)-1)
     props <- with(imp, mean(YB[src=="A"]=='0'))
     thetaAB <<- rbind(thetaAB, ra(props, s=TRUE))
-    tabs <- with(imp, ftable(addmargins(
-                      table(YA[src=="A"],YB[src=="A"],
-                            useNA="ifany", dnn=c("YA","YB")))))
   }
 }
-thetaBA <- NULL
 
 ## ----walkingimpute10, cache = TRUE---------------------------------------
 tau <- NULL
@@ -2857,6 +2834,7 @@ imp <- mice(fdd, pred = fdd.pred, meth = meth, maxit = 20,
 
 ## ----fddreshape----------------------------------------------------------
 lowi <- mice::complete(imp, "long", inc=TRUE)
+lowi <- lowi[, c(ncol(lowi), ncol(lowi) - 1, 1:(ncol(lowi) - 2))]
 lowi <- data.frame(lowi,cbcl2=NA, cbin2=NA,cbex2=NA)
 lolo <- reshape(lowi, idvar = 'id',
                 varying = 11:ncol(lowi),
@@ -2920,52 +2898,6 @@ tp <- xyplot(yc~time|trt, data=means, type="o", pch=19,
        xlab="Time", xlim=c("T1","T2","T3"))
 print(tp)
 
-## ----tbcinit, echo=FALSE-------------------------------------------------
-load("data/tbc/imp.1745")
-load("data/tbc/fit.hgt")
-load("data/tbc/fit.wgt")
-load("data/tbc/fit.bmi")
-
-original <- read_spss(file = "data/tbc/long_spss2splus.sav")
-names(original) <- c("id", "age", "occ", "nocc", "sex",
-                    "hgt",  "wgt", "bmi",
-                    "hgt.z", "wgt.z", "bmi.z", "wfh.z", "sys140")
-broad <- read_spss(file = "data/tbc/broad_spss2splus.sav")
-broad <- as_factor(broad)
-names(broad) <- tolower(names(broad))
-target <- data.frame(id=broad$koppel, ao = broad$ovjv, bmi.z.jv = broad$b_sdjv)
-
-# merge the target variables
-original <- merge(original, target, all.x = TRUE)
-
-### define subsets
-subset <- !is.na(original$age) & !(is.na(original$hgt.z) & is.na(original$wgt.z) & is.na(original$bmi.z))
-tbc <- original[subset, c("id","occ","nocc","age","sex",
-                          "hgt.z","wgt.z","bmi.z","ao")]
-tbc <- tbc[order(tbc$id, tbc$age),]
-
-### define useful administrative variables
-ord <- tbc$id
-first <- diff(ord)>=1
-typ <- factor(rep("obs",nrow(tbc)),levels=c("obs","sup","pred"))
-tbc <- data.frame(tbc[,1:3],first=c(TRUE,first),typ,tbc[4:ncol(tbc)])
-
-### make small dataset
-six <- c(1259,7019,2447,7460,8046,7646)
-set.seed(23221)
-sample <- unique(c(six,sample(unique(tbc$id),300)))
-idx <- (tbc$id) %in% sample
-data <- tbc[idx,]
-
-### select all data
-data <- tbc
-
-### remove those with nocc 1 or 2 or 3
-data <- data[data$nocc >= 3,]
-
-### missing data heat map
-tbc <- data
-
 ## ----tbcspline-----------------------------------------------------------
 library(splines)
 data <- tbc
@@ -3043,15 +2975,17 @@ imp.1745 <- mice(data2, meth = meth, pred = pred, m = 10,
                  maxit = 10, seed = 52711, print = FALSE)
 
 ## ----tbcplotimp,  echo=FALSE---------------------------------------------
+imp.1745 <- mice(data2, meth = meth, pred = pred, m = 10,
+                 maxit = 10, seed = 52711, print = FALSE)
 cd <- mice::complete(imp.1745, "long")
 sup <- cd[cd$typ=="sup", ]
 # sup <- cd[cd$typ=="imp",-c(2,10:12)]
 # sup$ao <- NA
 data3 <- data.frame(.imp=0,data2)
-data3 <- rbind(data3, sup[,-2])
+data3 <- rbind(data3, sup[, -ncol(sup)])
 
 # prepare for plotting
-idx <- data3$id %in% six
+idx <- data3$id %in% c(1259,7019,2447,7460,8046,7646)
 pd  <- data3[idx,]
 pd$id <- as.factor(pd$id)
 pd$grp <- pd$.imp
@@ -3083,7 +3017,8 @@ print(tbcimp)
 ## ----tbchw, eval=TRUE,  echo=FALSE---------------------------------------
 ### prepare for plotting
 cd <- mice::complete(imp.1745, 1)
-idx <- (cd$id) %in% sample
+set.seed(23221)
+idx <- (cd$id) %in% unique(c(six,sample(unique(tbc$id),300)))
 cd <- cd[idx,]
 
 shingle <- cut(cd$age,breaks=c(brk,29.01),right=FALSE, inc=TRUE,

diff --git a/Rmd/03-univariate-missing.Rmd b/Rmd/03-univariate-missing.Rmd
@@ -1198,7 +1198,7 @@ uncertainty that needs to be incorporated into the imputations.
 5. Calculate $\dot\beta = \hat\beta + \dot z_1 V^{1/2}$.
 
 6. Calculate $n_0$ predicted probabilities 
-   $\dot p = 1 / (1+ \exp(-\Xmis\dot\beta))$.
+   $\dot p = 1 / (1+ \exp(-X_\mathrm{mis}\dot\beta))$.
 
 7. Draw $n_0$ random variates from the uniform distribution
    $U(0,1)$ in the vector $u$.
@@ -1726,8 +1726,8 @@ The pattern-mixture model [@GLYNN1986B; @LITTLE1993] decomposes the
 joint distribution $P(Y,R)$ as
 
 \begin{align}
- P(Y,R)&=& P(Y|R)P(R) (\#eq:pmmod1)\\ 
-       &=& P(Y|R=1)P(R=1) + P(Y|R=0)P(R=0) (\#eq:pmmod2)
+ P(Y,R)&= P(Y|R)P(R) (\#eq:pmmod1)\\ 
+       &= P(Y|R=1)P(R=1) + P(Y|R=0)P(R=0) (\#eq:pmmod2)
 \end{align}
 
 Compared to Equation \@ref(eq:selection) this model only reverses the

diff --git a/Rmd/07-multilevel-imputation.Rmd b/Rmd/07-multilevel-imputation.Rmd
@@ -860,7 +860,7 @@ The `toenail` data were collected in a randomized parallel group trial
 comparing two treatments for a common toenail infection. A total of
 294 patients were seen at seven visits, and severity of infection was
 dichotomized as “not severe” (0) and “severe” (1). The version of the
-data in the `DPpackage` is all numeric and easy to analyze. The
+data in the `mice` is all numeric and easy to analyze. The
 following statements load the data, and expand the data to the full
 design with $7 \times 294 = 2058$ rows. There are in total 150 missed
 visits.

diff --git a/Rmd/09-measurement.Rmd b/Rmd/09-measurement.Rmd
@@ -459,17 +459,17 @@ to add log($T$) as an additional predictor. This strong relation may
 have been a consequence of the design, as the frail people were
 measured first.
 
-               $H_0(T)$     $T$   log($T$)     SBP     DBP
+               $H_0(T)$     $T$   $\log(T)$    SBP     DBP
   ---------- ---------- ------- ---------- ------- -------
   $H_0(T)$        1.000   0.997      0.830   0.169   0.137
   $T$             0.997   1.000      0.862   0.176   0.141
-  log($T$)        0.830   0.862      1.000   0.205   0.151
+  $\log(T)$       0.830   0.862      1.000   0.205   0.151
   SBP             0.169   0.176      0.205   1.000   0.592
   DBP             0.137   0.141      0.151   0.592   1.000
   ---------- ---------- ------- ---------- ------- -------
 
 : (\#tab:c85hazardcor) Pearson correlations between the cumulative
-death hazard $H_0(T)$, survival time $T$, log($T$), systolic and
+death hazard $H_0(T)$, survival time $T$, $\log(T)$, systolic and
 diastolic blood pressure.
 
 ### Some guidance
@@ -848,7 +848,7 @@ predict too low. This section explains why this happens.
 
 (ref:plotexplain) Illustration of the bias of predictive equations. In
 general, the combined region 2 + 3b will have fewer cases than region
-4a. Thiscauses a downward bias in the prevalence estimate.
+4a. This causes a downward bias in the prevalence estimate.
 
 Figure \@ref(fig:plotexplain) plots the data of Figure
 \@ref(fig:plotbmi) in a different way. The figure is centered around
@@ -1399,6 +1399,8 @@ first 20 iterations (where we impute $Y_\mathrm{A}$ from
 $Y_\mathrm{B}$ and vice versa) we add age and sex as covariates, and
 do another 20 iterations. This goes as follows:
 
+```{r walkingimpute9, cache = TRUE, echo = FALSE}
+```
 ```{r walkingimpute10, cache = TRUE}
 ```
 

diff --git a/Rmd/10-selection.Rmd b/Rmd/10-selection.Rmd
@@ -43,7 +43,7 @@ in the first 28 days, and another 67 children died between the ages of
 28 days and 19 years, leaving 959 survivors at the age of 19 years.
 Intermediate outcome measures from earlier follow-ups were available
 for 89% of the survivors at age 14 ($n$ = 854), 77% at age 10 ($n$ =
-712), 84% at age 9($n$ = 813), 96% at age 5 ($n$ = 927) and 97% at age
+712), 84% at age 9 ($n$ = 813), 96% at age 5 ($n$ = 927) and 97% at age
 2 ($n$ = 946).
 
 To study the effect of drop-out, @HILLE2005 divided the 959 survivors

diff --git a/Rmd/11-longitudinal.Rmd b/Rmd/11-longitudinal.Rmd
@@ -586,8 +586,6 @@ according to the broken stick model. The multiply imputed values are
 then used to estimate difference scores and regression models that
 throw light on the question of scientific interest.
 
-```{r tbcinit, echo = FALSE}
-```
 
 ### Broken stick model$^\spadesuit$ {#sec:brokenstick}