Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add general corrections, add fixes to be able to compile the book #10

Open
wants to merge 11 commits into
base: corrections
Choose a base branch
from
107 changes: 21 additions & 86 deletions R/fimd.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ data <- complete(imp)
Yobs <- airquality[, "Ozone"]
Yimp <- data[, "Ozone"]
mi.hist(Yimp, Yobs,
b = seq(-20, 200, 10), type = "continuous",
b = seq(-40, 200, 10), type = "continuous",
gray = FALSE, lwd = lwd,
obs.lwd = 1.5, mis.lwd = 1.5, imp.lwd = 1.5,
obs.col = mdc(4),mis.col = mdc(5), imp.col = "transparent",
Expand Down Expand Up @@ -387,15 +387,15 @@ test.impute <- function(data, m = 5, method = "norm", ...) {
imp <- mice(data, method = method, m = m, print = FALSE, ...)
fit <- with(imp, lm(y ~ x))
tab <- summary(pool(fit), "all", conf.int = TRUE)
as.numeric(tab["x", c("estimate", "2.5 %", "97.5 %")])
as.numeric(tab[2, c("estimate", "conf.low", "conf.high")])
}

## ----lin.sim4------------------------------------------------------------
simulate <- function(runs = 10) {
res <- array(NA, dim = c(2, runs, 3))
dimnames(res) <- list(c("norm.predict", "norm.nob"),
as.character(1:runs),
c("estimate", "2.5 %","97.5 %"))
c("estimate", "conf.low", "conf.high"))
for(run in 1:runs) {
data <- create.data(run = run)
data <- make.missing(data)
Expand All @@ -416,8 +416,8 @@ apply(res, c(1, 3), mean, na.rm = TRUE)
true <- 1
RB <- rowMeans(res[,, "estimate"]) - true
PB <- 100 * abs((rowMeans(res[,, "estimate"]) - true)/ true)
CR <- rowMeans(res[,, "2.5 %"] < true & true < res[,, "97.5 %"])
AW <- rowMeans(res[,, "97.5 %"] - res[,, "2.5 %"])
CR <- rowMeans(res[,, "conf.low"] < true & true < res[,, "conf.high"])
AW <- rowMeans(res[,, "conf.high"] - res[,, "conf.low"])
RMSE <- sqrt(rowMeans((res[,, "estimate"] - true)^2))
data.frame(RB, PB, CR, AW, RMSE)

Expand Down Expand Up @@ -1466,7 +1466,11 @@ imp1 <- mice(data, meth = meth, pred = pred, m = 10,
print = FALSE)

## ----compos4-------------------------------------------------------------
round(summary(pool(with(imp1, lm(Y3 ~ Y1 + Y2))))[, 1:2], 2)
rapply(summary(pool(with(imp1, lm(Y3 ~ Y1 + Y2))))[, 1:2],
round,
classes = "numeric",
how = "replace",
digits = 2)

## ----composition, echo=FALSE, fig.width=7, fig.height=4---------------------
meth <- make.method(data)
Expand Down Expand Up @@ -1551,7 +1555,7 @@ vis <- c("hgt", "wgt", "hc", "wgt.hc", "gen", "phb",
"tv", "reg")
expr <- expression((wgt - 40) * (hc - 50))
boys$wgt.hc <- with(boys, eval(expr))
imp.int2 <- mice(boys, m = 1, maxit = 1, visitSequence = vis,
imp.int2 <- mice(boys, m = 1, maxit = 2, visitSequence = vis,
meth = imp.int$meth, pred = imp.int$pred,
seed = 23390)

Expand Down Expand Up @@ -1642,7 +1646,6 @@ pkg <- c("mice", "micemd", "lme4", "tidyr",
loaded <- sapply(pkg, require, character.only = TRUE,
warn.conflicts = FALSE, quietly = TRUE)
suppressPackageStartupMessages(library(miceadds, warn.conflicts = FALSE, quietly = TRUE))
suppressPackageStartupMessages(library(DPpackage, warn.conflicts = FALSE, quietly = TRUE))
suppressPackageStartupMessages(library(mitml, warn.conflicts = FALSE, quietly = TRUE))


Expand Down Expand Up @@ -1774,7 +1777,7 @@ grid.arrange(p.pan, p.pmm)

## ----toenail.2l.3, cache=TRUE--------------------------------------------
library(tidyr)
data("toenail", package = "DPpackage")
data("toenail", package = "mice")
data <- tidyr::complete(toenail, ID, visit) %>%
tidyr::fill(treatment) %>%
dplyr::select(-month)
Expand Down Expand Up @@ -1832,6 +1835,7 @@ print(tp1, newpage = FALSE)

## ----toenail.est, cache=TRUE---------------------------------------------
library(purrr)
library(broom.mixed)
mice::complete(imp, "all") %>%
purrr::map(lme4::glmer,
formula = outcome ~ treatment * visit + (1 | ID),
Expand Down Expand Up @@ -2388,14 +2392,8 @@ xyplot(jitter(rrdiast, 10) ~ jitter(rrsyst, 10) | typ,
dat <- cbind(data2, dead = 1 - data2$dwa)
hazard <- nelsonaalen(dat, survda, dead)

## ----c85nelsoncorrelation, eval=FALSE, echo=FALSE------------------------
tmp <- data.frame(hazard, t = data2$survda,
logt = log(data2$survda),
SBP = data2$rrsyst, DBP = data2$rrdiast)
round(cor(tmp, use = "pair"), 3)

## ----c85km, echo=FALSE, fig.height=4-------------------------------------
library(survival)
suppressPackageStartupMessages(library(survival))
fit <- survfit(Surv(survda / 365, 1 - dwa) ~ is.na(rrsyst),
data = data2)
lwd <- 0.6
Expand Down Expand Up @@ -2541,21 +2539,6 @@ text(1:4, x = c(40, 28, 20, 32), y = c(4, 4, -4, -4), cex = 3)
box(lwd = lwd)
}

## ----walkinginit, echo=FALSE---------------------------------------------
library(mice)
library(foreign)
dataproject <- "Data/wad"
.store <- file.path(dataproject,"R/store/")

file <- file.path(dataproject, "original/walklink.sav")
original <- read.spss(file=file, to.data.frame=TRUE)
data <- original
names(data) <- c("src","sex","age","sip1","haq8","grs9")
data$src <- factor(data$src, levels=2:3, labels=c("Ergo","Euri"))
data$sip1 <- as.factor(data$sip1)
data$haq8 <- as.ordered(data$haq8)
data$grs9 <- as.ordered(data$grs9)

## ----walkingimpute1------------------------------------------------------
fA <- c(242, 43, 15, 0, 6)
fB <- c(145, 110, 29, 8)
Expand Down Expand Up @@ -2633,16 +2616,10 @@ micemill <- function(n) {
as.numeric(YB[src=="A"]),
method="kendall"))
tau <<- rbind(tau, ra(cors, s=TRUE)) # global assignment
means <- with(imp, mean(as.numeric(YA[src=="A"]), na.rm=TRUE))
thetaBA <<- rbind(thetaBA, ra(means, s=TRUE)-1)
props <- with(imp, mean(YB[src=="A"]=='0'))
thetaAB <<- rbind(thetaAB, ra(props, s=TRUE))
tabs <- with(imp, ftable(addmargins(
table(YA[src=="A"],YB[src=="A"],
useNA="ifany", dnn=c("YA","YB")))))
}
}
thetaBA <- NULL

## ----walkingimpute10, cache = TRUE---------------------------------------
tau <- NULL
Expand Down Expand Up @@ -2857,6 +2834,7 @@ imp <- mice(fdd, pred = fdd.pred, meth = meth, maxit = 20,

## ----fddreshape----------------------------------------------------------
lowi <- mice::complete(imp, "long", inc=TRUE)
lowi <- lowi[, c(ncol(lowi), ncol(lowi) - 1, 1:(ncol(lowi) - 2))]
lowi <- data.frame(lowi,cbcl2=NA, cbin2=NA,cbex2=NA)
lolo <- reshape(lowi, idvar = 'id',
varying = 11:ncol(lowi),
Expand Down Expand Up @@ -2920,52 +2898,6 @@ tp <- xyplot(yc~time|trt, data=means, type="o", pch=19,
xlab="Time", xlim=c("T1","T2","T3"))
print(tp)

## ----tbcinit, echo=FALSE-------------------------------------------------
load("data/tbc/imp.1745")
load("data/tbc/fit.hgt")
load("data/tbc/fit.wgt")
load("data/tbc/fit.bmi")

original <- read_spss(file = "data/tbc/long_spss2splus.sav")
names(original) <- c("id", "age", "occ", "nocc", "sex",
"hgt", "wgt", "bmi",
"hgt.z", "wgt.z", "bmi.z", "wfh.z", "sys140")
broad <- read_spss(file = "data/tbc/broad_spss2splus.sav")
broad <- as_factor(broad)
names(broad) <- tolower(names(broad))
target <- data.frame(id=broad$koppel, ao = broad$ovjv, bmi.z.jv = broad$b_sdjv)

# merge the target variables
original <- merge(original, target, all.x = TRUE)

### define subsets
subset <- !is.na(original$age) & !(is.na(original$hgt.z) & is.na(original$wgt.z) & is.na(original$bmi.z))
tbc <- original[subset, c("id","occ","nocc","age","sex",
"hgt.z","wgt.z","bmi.z","ao")]
tbc <- tbc[order(tbc$id, tbc$age),]

### define useful administrative variables
ord <- tbc$id
first <- diff(ord)>=1
typ <- factor(rep("obs",nrow(tbc)),levels=c("obs","sup","pred"))
tbc <- data.frame(tbc[,1:3],first=c(TRUE,first),typ,tbc[4:ncol(tbc)])

### make small dataset
six <- c(1259,7019,2447,7460,8046,7646)
set.seed(23221)
sample <- unique(c(six,sample(unique(tbc$id),300)))
idx <- (tbc$id) %in% sample
data <- tbc[idx,]

### select all data
data <- tbc

### remove those with nocc 1 or 2 or 3
data <- data[data$nocc >= 3,]

### missing data heat map
tbc <- data

## ----tbcspline-----------------------------------------------------------
library(splines)
data <- tbc
Expand Down Expand Up @@ -3043,15 +2975,17 @@ imp.1745 <- mice(data2, meth = meth, pred = pred, m = 10,
maxit = 10, seed = 52711, print = FALSE)

## ----tbcplotimp, echo=FALSE---------------------------------------------
imp.1745 <- mice(data2, meth = meth, pred = pred, m = 10,
maxit = 10, seed = 52711, print = FALSE)
cd <- mice::complete(imp.1745, "long")
sup <- cd[cd$typ=="sup", ]
# sup <- cd[cd$typ=="imp",-c(2,10:12)]
# sup$ao <- NA
data3 <- data.frame(.imp=0,data2)
data3 <- rbind(data3, sup[,-2])
data3 <- rbind(data3, sup[, -ncol(sup)])

# prepare for plotting
idx <- data3$id %in% six
idx <- data3$id %in% c(1259,7019,2447,7460,8046,7646)
pd <- data3[idx,]
pd$id <- as.factor(pd$id)
pd$grp <- pd$.imp
Expand Down Expand Up @@ -3083,7 +3017,8 @@ print(tbcimp)
## ----tbchw, eval=TRUE, echo=FALSE---------------------------------------
### prepare for plotting
cd <- mice::complete(imp.1745, 1)
idx <- (cd$id) %in% sample
set.seed(23221)
idx <- (cd$id) %in% unique(c(six,sample(unique(tbc$id),300)))
cd <- cd[idx,]

shingle <- cut(cd$age,breaks=c(brk,29.01),right=FALSE, inc=TRUE,
Expand Down
6 changes: 3 additions & 3 deletions Rmd/03-univariate-missing.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,7 @@ uncertainty that needs to be incorporated into the imputations.
5. Calculate $\dot\beta = \hat\beta + \dot z_1 V^{1/2}$.

6. Calculate $n_0$ predicted probabilities
$\dot p = 1 / (1+ \exp(-\Xmis\dot\beta))$.
$\dot p = 1 / (1+ \exp(-X_\mathrm{mis}\dot\beta))$.

7. Draw $n_0$ random variates from the uniform distribution
$U(0,1)$ in the vector $u$.
Expand Down Expand Up @@ -1726,8 +1726,8 @@ The pattern-mixture model [@GLYNN1986B; @LITTLE1993] decomposes the
joint distribution $P(Y,R)$ as

\begin{align}
P(Y,R)&=& P(Y|R)P(R) (\#eq:pmmod1)\\
&=& P(Y|R=1)P(R=1) + P(Y|R=0)P(R=0) (\#eq:pmmod2)
P(Y,R)&= P(Y|R)P(R) (\#eq:pmmod1)\\
&= P(Y|R=1)P(R=1) + P(Y|R=0)P(R=0) (\#eq:pmmod2)
\end{align}

Compared to Equation \@ref(eq:selection) this model only reverses the
Expand Down
2 changes: 1 addition & 1 deletion Rmd/07-multilevel-imputation.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ The `toenail` data were collected in a randomized parallel group trial
comparing two treatments for a common toenail infection. A total of
294 patients were seen at seven visits, and severity of infection was
dichotomized as “not severe” (0) and “severe” (1). The version of the
data in the `DPpackage` is all numeric and easy to analyze. The
data in the `mice` is all numeric and easy to analyze. The
following statements load the data, and expand the data to the full
design with $7 \times 294 = 2058$ rows. There are in total 150 missed
visits.
Expand Down
10 changes: 6 additions & 4 deletions Rmd/09-measurement.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -459,17 +459,17 @@ to add log($T$) as an additional predictor. This strong relation may
have been a consequence of the design, as the frail people were
measured first.

$H_0(T)$ $T$ log($T$) SBP DBP
$H_0(T)$ $T$ $\log(T)$ SBP DBP
---------- ---------- ------- ---------- ------- -------
$H_0(T)$ 1.000 0.997 0.830 0.169 0.137
$T$ 0.997 1.000 0.862 0.176 0.141
log($T$) 0.830 0.862 1.000 0.205 0.151
$\log(T)$ 0.830 0.862 1.000 0.205 0.151
SBP 0.169 0.176 0.205 1.000 0.592
DBP 0.137 0.141 0.151 0.592 1.000
---------- ---------- ------- ---------- ------- -------

: (\#tab:c85hazardcor) Pearson correlations between the cumulative
death hazard $H_0(T)$, survival time $T$, log($T$), systolic and
death hazard $H_0(T)$, survival time $T$, $\log(T)$, systolic and
diastolic blood pressure.

### Some guidance
Expand Down Expand Up @@ -848,7 +848,7 @@ predict too low. This section explains why this happens.

(ref:plotexplain) Illustration of the bias of predictive equations. In
general, the combined region 2 + 3b will have fewer cases than region
4a. Thiscauses a downward bias in the prevalence estimate.
4a. This causes a downward bias in the prevalence estimate.

Figure \@ref(fig:plotexplain) plots the data of Figure
\@ref(fig:plotbmi) in a different way. The figure is centered around
Expand Down Expand Up @@ -1399,6 +1399,8 @@ first 20 iterations (where we impute $Y_\mathrm{A}$ from
$Y_\mathrm{B}$ and vice versa) we add age and sex as covariates, and
do another 20 iterations. This goes as follows:

```{r walkingimpute9, cache = TRUE, echo = FALSE}
```
```{r walkingimpute10, cache = TRUE}
```

Expand Down
2 changes: 1 addition & 1 deletion Rmd/10-selection.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ in the first 28 days, and another 67 children died between the ages of
28 days and 19 years, leaving 959 survivors at the age of 19 years.
Intermediate outcome measures from earlier follow-ups were available
for 89% of the survivors at age 14 ($n$ = 854), 77% at age 10 ($n$ =
712), 84% at age 9($n$ = 813), 96% at age 5 ($n$ = 927) and 97% at age
712), 84% at age 9 ($n$ = 813), 96% at age 5 ($n$ = 927) and 97% at age
2 ($n$ = 946).

To study the effect of drop-out, @HILLE2005 divided the 959 survivors
Expand Down
2 changes: 0 additions & 2 deletions Rmd/11-longitudinal.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -586,8 +586,6 @@ according to the broken stick model. The multiply imputed values are
then used to estimate difference scores and regression models that
throw light on the question of scientific interest.

```{r tbcinit, echo = FALSE}
```

### Broken stick model$^\spadesuit$ {#sec:brokenstick}

Expand Down