From 888df6a87c233f0325c9e90a46e6635c76e784ea Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Wed, 1 Nov 2023 16:10:26 +0000 Subject: [PATCH 01/11] fix latex formatting in algorithm 3.5 --- Rmd/03-univariate-missing.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rmd/03-univariate-missing.Rmd b/Rmd/03-univariate-missing.Rmd index 0c75f95..f01e600 100644 --- a/Rmd/03-univariate-missing.Rmd +++ b/Rmd/03-univariate-missing.Rmd @@ -1198,7 +1198,7 @@ uncertainty that needs to be incorporated into the imputations. 5. Calculate $\dot\beta = \hat\beta + \dot z_1 V^{1/2}$. 6. Calculate $n_0$ predicted probabilities - $\dot p = 1 / (1+ \exp(-\Xmis\dot\beta))$. + $\dot p = 1 / (1+ \exp(-X_\mathrm{mis}\dot\beta))$. 7. Draw $n_0$ random variates from the uniform distribution $U(0,1)$ in the vector $u$. From d8e37ec365ec8b81633ae197f693751715ddb73d Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Wed, 1 Nov 2023 17:00:08 +0000 Subject: [PATCH 02/11] close #2, fix column name discrepancy --- R/fimd.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/fimd.R b/R/fimd.R index ab262e3..7e43c43 100644 --- a/R/fimd.R +++ b/R/fimd.R @@ -387,7 +387,7 @@ test.impute <- function(data, m = 5, method = "norm", ...) { imp <- mice(data, method = method, m = m, print = FALSE, ...) fit <- with(imp, lm(y ~ x)) tab <- summary(pool(fit), "all", conf.int = TRUE) - as.numeric(tab["x", c("estimate", "2.5 %", "97.5 %")]) + as.numeric(tab[2, c("estimate", "conf.low", "conf.high")]) } ## ----lin.sim4------------------------------------------------------------ @@ -395,7 +395,7 @@ simulate <- function(runs = 10) { res <- array(NA, dim = c(2, runs, 3)) dimnames(res) <- list(c("norm.predict", "norm.nob"), as.character(1:runs), - c("estimate", "2.5 %","97.5 %")) + c("estimate", "conf.low", "conf.high")) for(run in 1:runs) { data <- create.data(run = run) data <- make.missing(data) @@ -416,8 +416,8 @@ apply(res, c(1, 3), mean, na.rm = TRUE) true <- 1 RB <- rowMeans(res[,, "estimate"]) - true PB <- 100 * abs((rowMeans(res[,, "estimate"]) - true)/ true) -CR <- rowMeans(res[,, "2.5 %"] < true & true < res[,, "97.5 %"]) -AW <- rowMeans(res[,, "97.5 %"] - res[,, "2.5 %"]) +CR <- rowMeans(res[,, "conf.low"] < true & true < res[,, "conf.high"]) +AW <- rowMeans(res[,, "97.5 %"] - res[,, "conf.low"]) RMSE <- sqrt(rowMeans((res[,, "estimate"] - true)^2)) data.frame(RB, PB, CR, AW, RMSE) From 9d65ca4eb08d2a041e3be197a156c6ff347c26d2 Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Wed, 1 Nov 2023 18:18:29 +0000 Subject: [PATCH 03/11] fix typo introduced in d8e37ec365ec8b81633ae197f693751715ddb73d --- R/fimd.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fimd.R b/R/fimd.R index 7e43c43..a8e34c4 100644 --- a/R/fimd.R +++ b/R/fimd.R @@ -417,7 +417,7 @@ true <- 1 RB <- rowMeans(res[,, "estimate"]) - true PB <- 100 * abs((rowMeans(res[,, "estimate"]) - true)/ true) CR <- rowMeans(res[,, "conf.low"] < true & true < res[,, "conf.high"]) -AW <- rowMeans(res[,, "97.5 %"] - res[,, "conf.low"]) +AW <- rowMeans(res[,, "conf.high"] - res[,, "conf.low"]) RMSE <- sqrt(rowMeans((res[,, "estimate"] - true)^2)) data.frame(RB, PB, CR, AW, RMSE) From e70e1431563bd1743fc1d0ee126b5251df076142 Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Wed, 1 Nov 2023 18:19:06 +0000 Subject: [PATCH 04/11] fix out of bounds error --- R/fimd.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fimd.R b/R/fimd.R index a8e34c4..8ce5809 100644 --- a/R/fimd.R +++ b/R/fimd.R @@ -155,7 +155,7 @@ data <- complete(imp) Yobs <- airquality[, "Ozone"] Yimp <- data[, "Ozone"] mi.hist(Yimp, Yobs, - b = seq(-20, 200, 10), type = "continuous", + b = seq(-40, 200, 10), type = "continuous", gray = FALSE, lwd = lwd, obs.lwd = 1.5, mis.lwd = 1.5, imp.lwd = 1.5, obs.col = mdc(4),mis.col = mdc(5), imp.col = "transparent", From 8150665a1fbb5d442bcc176ded3de791733f7c2d Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Wed, 1 Nov 2023 18:20:01 +0000 Subject: [PATCH 05/11] round numeric data only --- R/fimd.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/fimd.R b/R/fimd.R index 8ce5809..428c8cc 100644 --- a/R/fimd.R +++ b/R/fimd.R @@ -1466,7 +1466,11 @@ imp1 <- mice(data, meth = meth, pred = pred, m = 10, print = FALSE) ## ----compos4------------------------------------------------------------- -round(summary(pool(with(imp1, lm(Y3 ~ Y1 + Y2))))[, 1:2], 2) +rapply(summary(pool(with(imp1, lm(Y3 ~ Y1 + Y2))))[, 1:2], + round, + classes = "numeric", + how = "replace", + digits = 2) ## ----composition, echo=FALSE, fig.width=7, fig.height=4--------------------- meth <- make.method(data) From a8bcf2ab847597c2ee8517828904affc04c1aa7b Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Wed, 1 Nov 2023 19:23:00 +0000 Subject: [PATCH 06/11] replace abandoned package references --- R/fimd.R | 5 ++--- Rmd/07-multilevel-imputation.Rmd | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/fimd.R b/R/fimd.R index 428c8cc..57f231d 100644 --- a/R/fimd.R +++ b/R/fimd.R @@ -1555,7 +1555,7 @@ vis <- c("hgt", "wgt", "hc", "wgt.hc", "gen", "phb", "tv", "reg") expr <- expression((wgt - 40) * (hc - 50)) boys$wgt.hc <- with(boys, eval(expr)) -imp.int2 <- mice(boys, m = 1, maxit = 1, visitSequence = vis, +imp.int2 <- mice(boys, m = 1, maxit = 2, visitSequence = vis, meth = imp.int$meth, pred = imp.int$pred, seed = 23390) @@ -1646,7 +1646,6 @@ pkg <- c("mice", "micemd", "lme4", "tidyr", loaded <- sapply(pkg, require, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE) suppressPackageStartupMessages(library(miceadds, warn.conflicts = FALSE, quietly = TRUE)) -suppressPackageStartupMessages(library(DPpackage, warn.conflicts = FALSE, quietly = TRUE)) suppressPackageStartupMessages(library(mitml, warn.conflicts = FALSE, quietly = TRUE)) @@ -1778,7 +1777,7 @@ grid.arrange(p.pan, p.pmm) ## ----toenail.2l.3, cache=TRUE-------------------------------------------- library(tidyr) -data("toenail", package = "DPpackage") +data("toenail", package = "mice") data <- tidyr::complete(toenail, ID, visit) %>% tidyr::fill(treatment) %>% dplyr::select(-month) diff --git a/Rmd/07-multilevel-imputation.Rmd b/Rmd/07-multilevel-imputation.Rmd index db28bb4..97ae657 100644 --- a/Rmd/07-multilevel-imputation.Rmd +++ b/Rmd/07-multilevel-imputation.Rmd @@ -860,7 +860,7 @@ The `toenail` data were collected in a randomized parallel group trial comparing two treatments for a common toenail infection. A total of 294 patients were seen at seven visits, and severity of infection was dichotomized as “not severe” (0) and “severe” (1). The version of the -data in the `DPpackage` is all numeric and easy to analyze. The +data in the `mice` is all numeric and easy to analyze. The following statements load the data, and expand the data to the full design with $7 \times 294 = 2058$ rows. There are in total 150 missed visits. From 551c15a80616ce77e2aeaf6ee18c840d85eeeb62 Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Wed, 1 Nov 2023 19:32:57 +0000 Subject: [PATCH 07/11] fix build error caused by glmer --- R/fimd.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/fimd.R b/R/fimd.R index 57f231d..eb39e09 100644 --- a/R/fimd.R +++ b/R/fimd.R @@ -1835,6 +1835,7 @@ print(tp1, newpage = FALSE) ## ----toenail.est, cache=TRUE--------------------------------------------- library(purrr) +library(broom.mixed) mice::complete(imp, "all") %>% purrr::map(lme4::glmer, formula = outcome ~ treatment * visit + (1 | ID), From 472383cb412ad4d70db8ef7d2d3f6c1e8cf04b57 Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Fri, 3 Nov 2023 15:59:50 +0000 Subject: [PATCH 08/11] fix minor typos in ch 9, 10 --- Rmd/09-measurement.Rmd | 2 +- Rmd/10-selection.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Rmd/09-measurement.Rmd b/Rmd/09-measurement.Rmd index d4e3b54..9f51ba6 100644 --- a/Rmd/09-measurement.Rmd +++ b/Rmd/09-measurement.Rmd @@ -848,7 +848,7 @@ predict too low. This section explains why this happens. (ref:plotexplain) Illustration of the bias of predictive equations. In general, the combined region 2 + 3b will have fewer cases than region -4a. Thiscauses a downward bias in the prevalence estimate. +4a. This causes a downward bias in the prevalence estimate. Figure \@ref(fig:plotexplain) plots the data of Figure \@ref(fig:plotbmi) in a different way. The figure is centered around diff --git a/Rmd/10-selection.Rmd b/Rmd/10-selection.Rmd index ab8e130..adea3c4 100644 --- a/Rmd/10-selection.Rmd +++ b/Rmd/10-selection.Rmd @@ -43,7 +43,7 @@ in the first 28 days, and another 67 children died between the ages of 28 days and 19 years, leaving 959 survivors at the age of 19 years. Intermediate outcome measures from earlier follow-ups were available for 89% of the survivors at age 14 ($n$ = 854), 77% at age 10 ($n$ = -712), 84% at age 9($n$ = 813), 96% at age 5 ($n$ = 927) and 97% at age +712), 84% at age 9 ($n$ = 813), 96% at age 5 ($n$ = 927) and 97% at age 2 ($n$ = 946). To study the effect of drop-out, @HILLE2005 divided the 959 survivors From a44f8df715411d1845729ebeab9f0fdfcbde4f84 Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Fri, 3 Nov 2023 18:42:41 +0000 Subject: [PATCH 09/11] drop unused & update code, add missing code evaluation, fix typos, ch9 --- R/fimd.R | 85 ++++-------------------------------------- Rmd/09-measurement.Rmd | 8 ++-- 2 files changed, 13 insertions(+), 80 deletions(-) diff --git a/R/fimd.R b/R/fimd.R index eb39e09..f57d55b 100644 --- a/R/fimd.R +++ b/R/fimd.R @@ -2392,14 +2392,8 @@ xyplot(jitter(rrdiast, 10) ~ jitter(rrsyst, 10) | typ, dat <- cbind(data2, dead = 1 - data2$dwa) hazard <- nelsonaalen(dat, survda, dead) -## ----c85nelsoncorrelation, eval=FALSE, echo=FALSE------------------------ -tmp <- data.frame(hazard, t = data2$survda, - logt = log(data2$survda), - SBP = data2$rrsyst, DBP = data2$rrdiast) -round(cor(tmp, use = "pair"), 3) - ## ----c85km, echo=FALSE, fig.height=4------------------------------------- -library(survival) +suppressPackageStartupMessages(library(survival)) fit <- survfit(Surv(survda / 365, 1 - dwa) ~ is.na(rrsyst), data = data2) lwd <- 0.6 @@ -2545,21 +2539,6 @@ text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3) box(lwd = lwd) } -## ----walkinginit, echo=FALSE--------------------------------------------- -library(mice) -library(foreign) -dataproject <- "Data/wad" -.store <- file.path(dataproject,"R/store/") - -file <- file.path(dataproject, "original/walklink.sav") -original <- read.spss(file=file, to.data.frame=TRUE) -data <- original -names(data) <- c("src","sex","age","sip1","haq8","grs9") -data$src <- factor(data$src, levels=2:3, labels=c("Ergo","Euri")) -data$sip1 <- as.factor(data$sip1) -data$haq8 <- as.ordered(data$haq8) -data$grs9 <- as.ordered(data$grs9) - ## ----walkingimpute1------------------------------------------------------ fA <- c(242, 43, 15, 0, 6) fB <- c(145, 110, 29, 8) @@ -2637,16 +2616,10 @@ micemill <- function(n) { as.numeric(YB[src=="A"]), method="kendall")) tau <<- rbind(tau, ra(cors, s=TRUE)) # global assignment - means <- with(imp, mean(as.numeric(YA[src=="A"]), na.rm=TRUE)) - thetaBA <<- rbind(thetaBA, ra(means, s=TRUE)-1) props <- with(imp, mean(YB[src=="A"]=='0')) thetaAB <<- rbind(thetaAB, ra(props, s=TRUE)) - tabs <- with(imp, ftable(addmargins( - table(YA[src=="A"],YB[src=="A"], - useNA="ifany", dnn=c("YA","YB"))))) } } -thetaBA <- NULL ## ----walkingimpute10, cache = TRUE--------------------------------------- tau <- NULL @@ -2861,6 +2834,7 @@ imp <- mice(fdd, pred = fdd.pred, meth = meth, maxit = 20, ## ----fddreshape---------------------------------------------------------- lowi <- mice::complete(imp, "long", inc=TRUE) +lowi <- lowi[, c(ncol(lowi), ncol(lowi) - 1, 1:(ncol(lowi) - 2))] lowi <- data.frame(lowi,cbcl2=NA, cbin2=NA,cbex2=NA) lolo <- reshape(lowi, idvar = 'id', varying = 11:ncol(lowi), @@ -2924,52 +2898,6 @@ tp <- xyplot(yc~time|trt, data=means, type="o", pch=19, xlab="Time", xlim=c("T1","T2","T3")) print(tp) -## ----tbcinit, echo=FALSE------------------------------------------------- -load("data/tbc/imp.1745") -load("data/tbc/fit.hgt") -load("data/tbc/fit.wgt") -load("data/tbc/fit.bmi") - -original <- read_spss(file = "data/tbc/long_spss2splus.sav") -names(original) <- c("id", "age", "occ", "nocc", "sex", - "hgt", "wgt", "bmi", - "hgt.z", "wgt.z", "bmi.z", "wfh.z", "sys140") -broad <- read_spss(file = "data/tbc/broad_spss2splus.sav") -broad <- as_factor(broad) -names(broad) <- tolower(names(broad)) -target <- data.frame(id=broad$koppel, ao = broad$ovjv, bmi.z.jv = broad$b_sdjv) - -# merge the target variables -original <- merge(original, target, all.x = TRUE) - -### define subsets -subset <- !is.na(original$age) & !(is.na(original$hgt.z) & is.na(original$wgt.z) & is.na(original$bmi.z)) -tbc <- original[subset, c("id","occ","nocc","age","sex", - "hgt.z","wgt.z","bmi.z","ao")] -tbc <- tbc[order(tbc$id, tbc$age),] - -### define useful administrative variables -ord <- tbc$id -first <- diff(ord)>=1 -typ <- factor(rep("obs",nrow(tbc)),levels=c("obs","sup","pred")) -tbc <- data.frame(tbc[,1:3],first=c(TRUE,first),typ,tbc[4:ncol(tbc)]) - -### make small dataset -six <- c(1259,7019,2447,7460,8046,7646) -set.seed(23221) -sample <- unique(c(six,sample(unique(tbc$id),300))) -idx <- (tbc$id) %in% sample -data <- tbc[idx,] - -### select all data -data <- tbc - -### remove those with nocc 1 or 2 or 3 -data <- data[data$nocc >= 3,] - -### missing data heat map -tbc <- data - ## ----tbcspline----------------------------------------------------------- library(splines) data <- tbc @@ -3047,15 +2975,17 @@ imp.1745 <- mice(data2, meth = meth, pred = pred, m = 10, maxit = 10, seed = 52711, print = FALSE) ## ----tbcplotimp, echo=FALSE--------------------------------------------- +imp.1745 <- mice(data2, meth = meth, pred = pred, m = 10, + maxit = 10, seed = 52711, print = FALSE) cd <- mice::complete(imp.1745, "long") sup <- cd[cd$typ=="sup", ] # sup <- cd[cd$typ=="imp",-c(2,10:12)] # sup$ao <- NA data3 <- data.frame(.imp=0,data2) -data3 <- rbind(data3, sup[,-2]) +data3 <- rbind(data3, sup[, -ncol(sup)]) # prepare for plotting -idx <- data3$id %in% six +idx <- data3$id %in% c(1259,7019,2447,7460,8046,7646) pd <- data3[idx,] pd$id <- as.factor(pd$id) pd$grp <- pd$.imp @@ -3087,7 +3017,8 @@ print(tbcimp) ## ----tbchw, eval=TRUE, echo=FALSE--------------------------------------- ### prepare for plotting cd <- mice::complete(imp.1745, 1) -idx <- (cd$id) %in% sample +set.seed(23221) +idx <- (cd$id) %in% unique(c(six,sample(unique(tbc$id),300))) cd <- cd[idx,] shingle <- cut(cd$age,breaks=c(brk,29.01),right=FALSE, inc=TRUE, diff --git a/Rmd/09-measurement.Rmd b/Rmd/09-measurement.Rmd index 9f51ba6..d37d95e 100644 --- a/Rmd/09-measurement.Rmd +++ b/Rmd/09-measurement.Rmd @@ -459,17 +459,17 @@ to add log($T$) as an additional predictor. This strong relation may have been a consequence of the design, as the frail people were measured first. - $H_0(T)$ $T$ log($T$) SBP DBP + $H_0(T)$ $T$ $\log(T)$ SBP DBP ---------- ---------- ------- ---------- ------- ------- $H_0(T)$ 1.000 0.997 0.830 0.169 0.137 $T$ 0.997 1.000 0.862 0.176 0.141 - log($T$) 0.830 0.862 1.000 0.205 0.151 + $\log(T)$ 0.830 0.862 1.000 0.205 0.151 SBP 0.169 0.176 0.205 1.000 0.592 DBP 0.137 0.141 0.151 0.592 1.000 ---------- ---------- ------- ---------- ------- ------- : (\#tab:c85hazardcor) Pearson correlations between the cumulative -death hazard $H_0(T)$, survival time $T$, log($T$), systolic and +death hazard $H_0(T)$, survival time $T$, $\log(T)$, systolic and diastolic blood pressure. ### Some guidance @@ -1399,6 +1399,8 @@ first 20 iterations (where we impute $Y_\mathrm{A}$ from $Y_\mathrm{B}$ and vice versa) we add age and sex as covariates, and do another 20 iterations. This goes as follows: +```{r walkingimpute9, cache = TRUE, echo = FALSE} +``` ```{r walkingimpute10, cache = TRUE} ``` From aad28fe8646921560bd77638f2a0e20b1e06dcfc Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Fri, 3 Nov 2023 18:44:01 +0000 Subject: [PATCH 10/11] drop redundant code evaluation, ch11 --- Rmd/11-longitudinal.Rmd | 2 -- 1 file changed, 2 deletions(-) diff --git a/Rmd/11-longitudinal.Rmd b/Rmd/11-longitudinal.Rmd index 2403acb..fb104e6 100644 --- a/Rmd/11-longitudinal.Rmd +++ b/Rmd/11-longitudinal.Rmd @@ -586,8 +586,6 @@ according to the broken stick model. The multiply imputed values are then used to estimate difference scores and regression models that throw light on the question of scientific interest. -```{r tbcinit, echo = FALSE} -``` ### Broken stick model$^\spadesuit$ {#sec:brokenstick} From c0573c6819aeea6c6718adfba2cb9ead4d456622 Mon Sep 17 00:00:00 2001 From: Vladimir Khodygo Date: Fri, 3 Nov 2023 19:17:21 +0000 Subject: [PATCH 11/11] correct equation alignment in ch3 --- Rmd/03-univariate-missing.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Rmd/03-univariate-missing.Rmd b/Rmd/03-univariate-missing.Rmd index f01e600..3074064 100644 --- a/Rmd/03-univariate-missing.Rmd +++ b/Rmd/03-univariate-missing.Rmd @@ -1726,8 +1726,8 @@ The pattern-mixture model [@GLYNN1986B; @LITTLE1993] decomposes the joint distribution $P(Y,R)$ as \begin{align} - P(Y,R)&=& P(Y|R)P(R) (\#eq:pmmod1)\\ - &=& P(Y|R=1)P(R=1) + P(Y|R=0)P(R=0) (\#eq:pmmod2) + P(Y,R)&= P(Y|R)P(R) (\#eq:pmmod1)\\ + &= P(Y|R=1)P(R=1) + P(Y|R=0)P(R=0) (\#eq:pmmod2) \end{align} Compared to Equation \@ref(eq:selection) this model only reverses the