-
Notifications
You must be signed in to change notification settings - Fork 5
/
lec04.rnw
951 lines (841 loc) · 28.3 KB
/
lec04.rnw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
\documentclass[table,10pt]{beamer}
\mode<presentation>{
%\usetheme{Goettingen}
\usetheme{Boadilla}
\usecolortheme{default}
}
\usepackage{CJK}
\usepackage{graphicx}
\usepackage{amsmath, amsopn}
\usepackage{xcolor}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage[latin1]{inputenc}
\usepackage{enumerate}
\usepackage{multirow}
\usepackage{url}
\ifx\hypersetup\undefined
\AtbBeginDocument{%
\hypersetup{unicode=true,pdfusetitle,
bookmarks=true,bookmarksnumbered=false,bookmarksopen=false,
breaklinks=false,pdfborder={0 0 0},pdfborderstyle={},backref=false,colorlinks=false}
}
\else
\hypersetup{unicode=true,pdfusetitle,
bookmarks=true,bookmarksnumbered=false,bookmarksopen=false,
breaklinks=false,pdfborder={0 0 0},pdfborderstyle={},backref=false,colorlinks=false}
\fi
\usepackage{breakurl}
\usepackage{color}
\usepackage{times}
\usepackage{xcolor}
\usepackage{listings}
\lstset{
language=R,
keywordstyle=\color{blue!70}\bfseries,
basicstyle=\ttfamily,
commentstyle=\ttfamily,
showspaces=false,
showtabs=false,
frame=shadowbox,
rulesepcolor=\color{red!20!green!20!blue!20},
breaklines=true}
\setlength{\parskip}{.5em}
\title[BI476]{BI476: Biostatistics - Case Studies}
\subtitle[anova]{Lec04: Clinical Trial Data Analysis}
\author[Maoying Wu]{Maoying,Wu\\{\small [email protected]}}
\institute[CBB] % (optional, but mostly needed)
{
\inst{}
Dept. of Bioinformatics \& Biostatistics\\
Shanghai Jiao Tong University
}
\date{Spring, 2018}
\AtBeginSection[]
{
\begin{frame}<beamer>{Next Section ...}
\tableofcontents[currentsection]
\end{frame}
}
\begin{document}
\begin{CJK*}{UTF8}{gbsn}
<<setup, include=FALSE>>=
library(knitr)
opts_chunk$set(fig.path='figure/beamer-', fig.align='center',
fig.show='hold',size='footnotesize', out.width='0.60\\textwidth')
@
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\frametitle{Outline}
\tableofcontents
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{A Clinical Trial on Diastolic Blood Pressure (DBP)}
Here we present a data set of diastolic blood pressure measured in
small clinical trials in hypertension from the mid-to-late 1960s and
for approximately a decade thereafter. Diastolic blood pressure (DBP)
was measured (mmHg) in the supine position at baseline (i.e., ``DBP1'')
before randomization and monthly thereafter up to 4 months as indicated
by \texttt{DBP2}, \texttt{DBP3}, \texttt{DBP4} and \texttt{DBP5}.
Patients' age and sex were recorded at baseline and represent potential
covariates.
The primary objective in the analysis of this dataset is to test
whether treatment A (new drug) may be effective in lowering DBP as
compared to B (placebo) and to describe changes in DBP across the times
at which it was measured.
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Importing the dataset}
<<eval=TRUE>>=
options(contrast=c("contr.sum", "contr.poly"))
dbp <- read.table("data/dbp.txt", header=T)
dbp$diff <- dbp$DBP5 - dbp$DBP1
head(dbp)
table(dbp$TRT)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Baseline Information}
<<eval=TRUE>>=
par(mfrow=c(1,2))
boxplot(DBP1 ~ TRT, data=dbp)
barplot(table(dbp$Sex, dbp$TRT))
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Pairwise plot}
<<eval=TRUE>>=
pairs(dbp)
@
\end{frame}
\section{Parametric tests}
\begin{frame}[t]
\frametitle{Parametric tests}
\begin{itemize}
\item 2-groups: t-test for continuous outcome in completely randomized
parallel design.
\item 2-groups: Paired t-test for continuous outcome in crossover design.
\item 3+-groups: One-way ANOVA for continuous outcome in completely
randomized parallel design.
\item 2+-groups: Two-way ANOVA for continuous outcome in factorial design.
\item 3+-groups: One-way repeated-measures ANOVA for continuous outcome
in randomized block design.
\item 2-groups: Chisquare test or Fisher's exact test for binary outcome in
completely randomized parallel design.
\item 2-groups: McNemar's test for binary outcome in crossover design.
\item 3+-groups: Cochrane's Q-test for binary outcome in crossover design.
\end{itemize}
\end{frame}
\subsection{t-test}
\begin{frame}[t, containsverbatim]
\frametitle{Student's t-test for parallel design}
\framesubtitle{Comparing two treament group means with equal variances}
\begin{itemize}
\item \textbf{Assumption}: $Y_1$ and $Y_2$ are independent
and normally distributed with common variance
$\sigma^2$.
\item \textbf{Design}: Randomized parallel design
\item \textbf{Hypothesis}: $H_0: \mu_1 = \mu_2$ vs. $H_1: \mu_1 \neq \mu_2$
\item \textbf{Process}
\begin{itemize}
\item Compute the test statistic:
$$t = \frac{\bar{y}_1 - \bar{y}_2}{s\sqrt{1/n_1 + 1/n_2}}$$,
where
$$
\bar{y}_i = \sum_{j=1}^{n_i} y_{ij}/n_i, s = \sqrt{\frac{(n_1-1)s_1^2 + (n_2-1)s_2^2}{n_1+n_2-2}},
s_i^2 = \sum_{j=1}^{n_i} (y_{ij} - \bar{y}_i)^2/(n_i - 1)$$.
\item Reject $H_0$ if $t > t_{\alpha/2,n_1+n_2-2}$
\end{itemize}
\end{itemize}
\begin{lstlisting}
t.test(..., var.equal=TRUE)
\end{lstlisting}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{$t$-test with equal variances}
<<eval=TRUE>>=
t.test(diff ~ TRT, data=dbp, var.equal=TRUE)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Welch's t-test for parallel design}
\framesubtitle{Comparing two treatment group means with unequal variances}
\begin{enumerate}
\item Compute the $t$ statistic
$$
T = \frac{\bar{y}_1 - \bar{y}_2}{\sqrt{s_1^2/n_1 + s_2^2/n_2}}
$$
\item The degree of freedom
$$
\nu = \left[ \frac{c}{n_1-1} + \frac{(1-c)^2}{n_2-1}\right]^{-1}
$$
where
$$
c = \frac{s_1^2/n_1}{s_1^2/n_1 + s_2^2/n_2}
$$
\item Reject $H_0$ if $|T| > t_{\alpha/2, \nu}$
\end{enumerate}
\begin{lstlisting}
t.test(..., var.equal=FALSE)
\end{lstlisting}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{$t$-test with unequal variances}
<<eval=TRUE>>=
t.test(diff ~ TRT, data=dbp, var.equal=FALSE)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Wait...Are the two variances equal?}
<<eval=TRUE>>=
var.test(diff ~ TRT, data=dbp)
@
\end{frame}
\begin{frame}[t, containsverbatim]
\frametitle{One-sided t-test}
Since ``B'' is a placebo, the one-sided t-test may be more appropriate
to test the treatment effect:
<<eval=TRUE>>=
# data from treatment A and B
diff.A <- dbp$diff[dbp$TRT=='A']
diff.B <- dbp$diff[dbp$TRT=='B']
# call t.test for one-sided test
t.test(diff.A, diff.B, alternative="less")
@
\end{frame}
\subsection{ANOVA}
\begin{frame}[t]
\frametitle{One-way ANOVA}
The single factor $A$ has $k$ levels: $A_1, A_2, \dots, A_k$, and $n$ patients are allocated to
each treatment group. We can obtain the samples:
$$
y_{i1}, \dots, y_{in}, i=1, \dots, k
$$
\uncover<2->{\begin{block}{\center Fundamental statistics}
\footnotesize
\begin{itemize}
\item<3-> Grand mean: $\bar{y}_{*} = \frac{1}{kn}\sum_{i=1}^k \sum_{j=1}^n y_{ij}$
\item<3-> Marginal mean: $\bar{y}_i = \frac{1}{n}\sum_{j=1}^n y_{ij}$;
\item<4-> Total sum of squares (SST): $\mathrm{SS}_T = \sum_{i=1}^k \sum_{j=1}^n (y_{ij} - \bar{y}_*)^2$;
\item<4-> Between-group sum of squares (SSB): $\mathrm{SS}_B = n\sum_{i=1}^k (\bar{y}_i - \bar{y}_*)^2$;
\item<4-> Within-group sum of squares (SSE): $\mathrm{SS}_E = \sum_{i=1}^k \sum_{j=1}^n (y_{ij} - \bar{y}_i)^2$;
\end{itemize}
\end{block}
}
\end{frame}
\begin{frame}[t]
\frametitle{One-way ANOVA Table}
\begin{table}
\caption{One-way ANOVA Table with $k$ groups and $n$ subjects}
\begin{tabular}{lcccc}
\hline
Variance & $\textrm{SS}$ & $\textrm{DF}$ & $\textrm{MS}$ & $F$-value\\
\hline
Between-groups & $\textrm{SS}_b$ & $k-1$ & $\textrm{MS}_b$ & $F = \textrm{MS}_b/\textrm{MS}_w$\\
Within-groups & $\textrm{SS}_w$ & $n-k$ & $\textrm{MS}_w$ & \\
Total & $\textrm{SS}_T$ & $n-1$ & $\textrm{MS}_T$ & \\
\hline
\end{tabular}
\end{table}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{One-way ANOVA for Time Changes}
Since the treatment period in the DBP trial was measured at months 1,
2, 3 and 4 post baseline. To see the mean changes over the periods:
<<eval=TRUE>>=
aggregate(dbp[,3:7], list(TRT=dbp$TRT), mean)
@
Now we can employ the one-way ANOVA to test the change over time. But the first thing is to ``reshape'' the data:
<<eval=TRUE>>=
Dbp <- reshape(dbp, direction="long",
varying=paste("DBP", 1:5, sep=""),
idvar = c("Subject", "TRT", "Age", "Sex", "diff"), sep="")
colnames(Dbp) <- c("Subject", "TRT", "Age", "Sex", "diff", "Time", "DBP")
rownames(Dbp) <- NULL
Dbp$Time <- as.factor(Dbp$Time)
head(Dbp, 3)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{One-way ANOVA for Two Different Treatments}
<<eval=TRUE>>=
# test for treatment A
dbpA <- Dbp[Dbp$TRT=='A',]
test.A <- aov(DBP ~ Time, dbpA)
summary(test.A)
@
<<eval=TRUE>>=
# test for treatment B
dbpB <- Dbp[Dbp$TRT=='B', ]
test.B <- aov(DBP ~ Time, dbpB)
summary(test.B)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Post-hoc Tests for ANOVA of treatment A}
<<eval=TRUE>>=
TukeyHSD(test.A)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Post-hoc Tests for ANOVA of treatment B}
<<eval=TRUE>>=
TukeyHSD(test.B)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Two-way ANOVA With Interaction}
The DBP trial has two factors: treatment and Time. Under this
situation, one-way ANOVA (within treatment groups across Time)
cannot capture the interaction between these two factors.
Therefore, a two-way or multi-way ANOVA is needed to analyze the
interaction before making statistical inferences about the main
effects.
<<eval=TRUE>>=
mod <- aov(DBP ~ TRT*Time, Dbp)
summary(mod)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Plot Interaction Between Time and Treatment}
<<eval=TRUE>>=
par(mfrow=c(2,1), mar=c(5,3,1,1))
with(Dbp, interaction.plot(Time,TRT,DBP,las=1,legend=T))
with(Dbp, interaction.plot(TRT,Time,DBP,las=1,legend=T))
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Post-Hoc Analysis}
<<eval=TRUE>>=
TukeyHSD(aov(DBP ~ TRT*Time, Dbp))
@
\end{frame}
\subsection{ANCOVA}
\begin{frame}[t]
\frametitle{What is Analysis of Covariance (ANCOVA)?}
\begin{itemize}[<+->]
\item One-way/two-way/Multi-way ANOVA with uncontrollable but measurable
independent variable $X$;
\item The outcome $Y$ is continuous;
\item $Y$ and $X$ (covariate) has linear relationship;
\item Regression analysis is used to adjust for the effect of $X$ on $Y$.
\item Here we does not control for $X$ in the trials, but conduct a post-hoc
analysis.
\end{itemize}
\uncover<6->{
\begin{alertblock}{\center Example}
\begin{itemize}
\item Initial blood pressure ($X$) for the BP reductions ($Y$) in comparing different
blood pressure medications ($G$).
\item Parallel-group clinical trials with pre-treatment baselines.
\end{itemize}
\end{alertblock}
}
\end{frame}
\begin{frame}[t]
\frametitle{One-way ANCOVA}
$$
(x_{i1}, y_{i1}), \dots, (x_{is}, y_{is}), i=1, \dots, r
$$
\uncover<2->{\begin{block}{\center Fundamental statistics}
\footnotesize
\begin{itemize}
\item<3-> $\bar{x}_{i*} = \frac{1}{s}\sum_{j=1}^s x_{ij}; \bar{x}_{**} = \frac{1}{rs}\sum_{i=1}^r \sum_{j=1}^s x_{ij}$
\item<3-> $\bar{y}_{i*} = \frac{1}{s}\sum_{j=1}^s y_{ij}; \bar{y}_{**} = \frac{1}{rs}\sum_{i=1}^r \sum_{j=1}^s y_{ij}$
\item<4-> $\mathrm{SST}(x) = \sum_{i=1}^r \sum_{j=1}^s (x_{ij} - x_{**})^2$;
\item<4-> $\mathrm{SSA}(x) = s\sum_{i=1}^r (x_{i*} - x_{**})^2$;
\item<4-> $\mathrm{SSE}(x) = \sum_{i=1}^r \sum_{j=1}^s (x_{ij} - x_{i*})^2$;
\item<5-> $\mathrm{SST}(y) = \sum_{i=1}^r \sum_{j=1}^s (y_{ij} - y_{**})^2$;
\item<5-> $\mathrm{SSA}(y) = s\sum_{i=1}^r (y_{i*} - y_{**})^2$;
\item<5-> $\mathrm{SSE}(y) = \sum_{i=1}^r \sum_{j=1}^s (y_{ij} - y_{i*})^2$;
\item<6-> $\mathrm{SPT} = \sum_{i=1}^r \sum_{j=1}^s (x_{ij}-x_{**})(y_{ij}-y_{**})$;
\item<6-> $\mathrm{SPA} = s\sum_{i=1}^r (x_{i*} - x_{**})(y_{i*} - y_{**})$;
\item<6-> $\mathrm{SPE} = \sum_{i=1}^r \sum_{j=1}^s (x_{ij}-x_{i*})(y_{ij} - y_{i*})$.
\end{itemize}
\end{block}
}
\end{frame}
\begin{frame}[t]
\frametitle{One-way ANCOVA: Procedure}
\begin{enumerate}[<+->]
\item Compute the above statistics and group them into a table:
\begin{table}
\footnotesize
\begin{tabular}{l|c|c|c|c}
\hline
Variance & $\textrm{SS}(x)$ & $\textrm{SS}(y)$ & $\textrm{SP}$ & $\textrm{DF}$\\
\hline
inter-group & $\textrm{SSA}(x)$ & $\textrm{SSA}(y)$ & $\textrm{SPA}$ & $r-1$\\
intra-group & $\textrm{SSE}(x)$ & $\textrm{SSE}(y)$ & $\textrm{SPE}$ & $r(s-1)$\\
Total & $\textrm{SST}(x)$ & $\textrm{SST}(y)$ & $\textrm{SPT}$ & $rs-1$\\
\hline
\end{tabular}
\end{table}
\item Compute the intra-group regresson coefficient $\beta = \frac{\textrm{SPE}}{\textrm{SSE}(x)}$;
\item If significant, adjust for the linear regression:
$$
\bar{y}_{i*}(x = \bar{x}_{**}) = \bar{y}_{i*} - \beta(\bar{x}_{i*} - \bar{x}_{**})
$$
\item $Q_E = \textrm{SSE}(y) - \frac{(\textrm{SPE})^2}{\textrm{SSE}(x)}$;
\item $Q_T = \textrm{SST}(y) - \frac{(\textrm{SPT})^2}{\textrm{SST}(x)}$;
\item $Q_A = Q_T - Q_E$;
\item $MQ_A = \frac{Q_A}{r-1}$; $MQ_E = \frac{Q_E}{r(s-1)}$
\item $F=\frac{MQ_A}{MQ_E} \sim F_{r-1, r(s-1)-1}$
\end{enumerate}
\end{frame}
\begin{frame}[t]
\frametitle{One-way ANCOVA: Procedure}
\uncover<1->{\begin{table}
\footnotesize
\begin{tabular}{l|c|c|c|c}
\hline
Variance & $\textrm{SS}(x)$ & $\textrm{SS}(y)$ & $\textrm{SP}$ & $\textrm{DF}$\\
\hline
inter-group & $\textrm{SSA}(x)$ & $\textrm{SSA}(y)$ & $\textrm{SPA}$ & $r-1$\\
intra-group & $\textrm{SSE}(x)$ & $\textrm{SSE}(y)$ & $\textrm{SPE}$ & $r(s-1)$\\
Total & $\textrm{SST}(x)$ & $\textrm{SST}(y)$ & $\textrm{SPT}$ & $rs-1$\\
\hline
\end{tabular}
\end{table}}
\uncover<2->{
\alert{\begin{center} $\Downarrow$ \end{center}}
}
\uncover<3->{\begin{table}
\footnotesize
\caption{Adjusted ANCOVA Table}
\begin{tabular}{l|c|c|c|c|c}
\hline
Variance & $\textrm{SS}$ & $\textrm{DF}$ & $\textrm{MSS}$ & $\textrm{F}$ & Significance\\
\hline
inter-group & $Q_A$ & $r-1$ & $MQ_A$ & $F$ & \\
intra-group & $Q_E$ & $r(s-1)-1$ & $MQ_E$ & & \\
Total & $Q_T$ & $rs-2$ & & & \\
\hline
\end{tabular}
\end{table}}
\uncover<4->{\begin{alertblock}{\center Conclusion}
\end{alertblock}}
\end{frame}
\begin{frame}[t]
\frametitle{Two-way ANCOVA: No Interaction}
\begin{itemize}[<+->]
\item Two factors $A: A_1, \dots, A_r$ and $B: B_1, \dots, B_s$;
\item Continuous covariate $X$;
\item Continuous outcome $Y$;
\end{itemize}
\uncover<4->{\begin{table}
\footnotesize
\caption{Adjusted ANCOVA Table Without Interaction}
\begin{tabular}{l|c|c|c|c|c}
\hline
Variance & $\textrm{SS}$ & $\textrm{DF}$ & $\textrm{MSS}$ & $\textrm{F}$ & Significance\\
\hline
inter-A & $Q_A$ & $r-1$ & $MQ_A$ & $F_A$ & \\
inter-B & $Q_B$ & $s-1$ & $MQ_B$ & $F_B$ & \\
intra-group & $Q_E$ & $(r-1)(s-1)-1$ & $MQ_E$ & & \\
Total & $Q_T$ & $rs-2$ & & & \\
\hline
\end{tabular}
\end{table}}
\end{frame}
\begin{frame}[t]
\frametitle{Two-way ANCOVA: With Interaction}
\begin{itemize}[<+->]
\item Two factors $A: A_1, \dots, A_r$ and $B: B_1, \dots, B_s$;
\item Continuous covariate $X$;
\item Continuous outcome $Y$;
\item Each cell with $m$ observations.
\end{itemize}
\uncover<5->{\begin{table}
\footnotesize
\caption{Adjusted ANCOVA Table With Interaction}
\begin{tabular}{l|c|c|c|c|c}
\hline
Variance & $\textrm{SS}$ & $\textrm{DF}$ & $\textrm{MSS}$ & $\textrm{F}$ & Significance\\
\hline
inter-A & $Q_A$ & $r-1$ & $MQ_A$ & $F_A$ & \\
inter-B & $Q_B$ & $s-1$ & $MQ_B$ & $F_B$ & \\
inter-AB & $Q_{AB}$ & $(r-1)(s-1)$ & $MQ_{AB}$ & $F_{AB}$ & \\
intra-group & $Q_E$ & $rs(m-1)-1$ & $MQ_E$ & & \\
Total & $Q_T$ & $rsm-2$ & & & \\
\hline
\end{tabular}
\end{table}}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Analysis of DBP Change from Baseline with ANCOVA}
\begin{itemize}
\item We now analyze the change from baseline in DBP at the
end of trial which is defined as ``diff''.
\item We start from the full model containing all
``covariates''.
\item Perform backward stepwise model selection to simplify
the model:
\end{itemize}
<<eval=TRUE>>=
# start with the full model
m0 <- lm(diff ~ TRT*Age*Sex, dbp)
# stepwise model selection
m1 = step(m0)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{ANCOVA Analysis of the Changes from Baseline}
<<eval=TRUE>>=
# fit the reduced model
m2 <- lm(diff ~ TRT + Age, dbp)
# output the anova result
anova(m2)
@
\end{frame}
\subsection{Repeated Measure ANOVA}
\begin{frame}[t]
\frametitle{What is repeated-measure ANOVA?}
\begin{itemize}
\item<1-> The equivalent of the one-way ANOVA for correlated/non-independent groups
\begin{itemize}
\item Extension of paired $t$-test;
\item Within-subjects ANOVA;
\item ANOVA for correlated samples;
\end{itemize}
\item<2-> Here we only consider the one-way repeated measure ANOVA:
\begin{itemize}
\item One independent categorical variable (nominal or ordinal)
\item One continuous dependent variable (interval or ratio)
\end{itemize}
\end{itemize}
\only<2>{\begin{figure}
\includegraphics[width=0.50\textwidth]{images/repanova-time.png}
\end{figure}}
\only<3>{\begin{figure}
\includegraphics[width=0.50\textwidth]{images/repanova-treatment.png}
\end{figure}}
\uncover<4->{\begin{alertblock}{\center When to use repeated-measure ANOVA?}
\begin{itemize}
\item Changes in mean scores over three or more time points;
\item Differences in mean scores under three or more different conditions.
\end{itemize}
\end{alertblock}}
\end{frame}
\begin{frame}[t]
\frametitle{Logics of Repeated Measures ANOVA}
\begin{itemize}
\item<1-> ANOVA partitions total variability ($\textrm{SS}_T$) into between-groups
variability ($\textrm{SS}_b$) and within-groups variability ($\textrm{SS}_w$)
\item<2-> Within-group variability ($\textrm{SS}_w$) is defined as the error variability
($\textrm{SS}_{error}$).
\item<3-> Mean sum of squares for between-groups ($\textrm{MS}_w$) and within-groups
($\textrm{MS}_w$)
\item<4-> \alert{Independent ANOVA: $F = \frac{\textrm{MS}_b}{\textrm{MS}_w} = \frac{\textrm{MS}_b}{\textrm{MS}_{error}}$}
\item<5-> \alert{Repeated measures ANOVA: $F = \frac{\textrm{MS}_b}{\textrm{MS}_w} = \frac{\textrm{MS}_{condition}}{\textrm{MS}_{error}}$}
\item<6-> $\textrm{SS}_{error} = \textrm{SS}_w - \text{SS}_{subject} = \textrm{SS}_T - \textrm{SS}_{condition} - \textrm{SS}_{subject}$
\item<7-> $\textrm{MS}_{error} = \textrm{SS}_{error}/(k-1)(n-1)$, where $k$ is the numer of groups (time points or treatments), and $n$ is
the number of subjects.
\item<8> $\textrm{SS}_{subject} = k \times \sum_{i=1}^n (\bar{x}_i - \bar{x})^2$, where $\bar{x}_i$ is the mean for subject $i$.
\end{itemize}
\only<4>{
\begin{figure}
\includegraphics[width=0.6\textwidth]{images/anova-partition-ss-ind.png}
\end{figure}}
\only<5>{
\begin{figure}
\includegraphics[width=0.6\textwidth]{images/repanova-partition-ss.png}
\end{figure}}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Repeated-measures ANOVA for DBP Trial}
<<eval=TRUE>>=
Dbp2 <- reshape(dbp,direction="long",
varying=paste("DBP", 2:5, sep=""),
idvar = c("Subject", "TRT", "Age", "Sex", "DBP1"), sep="")
rownames(Dbp2) <- NULL
head(Dbp2)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Repeated-measures ANOVA for DBP Trial}
<<eval=TRUE>>=
m2 <- aov(DBP ~ DBP1 + TRT*time + Error(Subject/time), data=Dbp2)
summary(m2)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Repearted-meausures ANOVA: Pros}
\begin{itemize}
\item Lower cost
\begin{itemize}
\item Fewer samples needed
\end{itemize}
\item Increased power
\begin{itemize}
\item Eliminating the cross-subject variation off the systematic error.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Repeated-measures ANOVA: Cons}
\begin{itemize}
\item Order effects
\item Missing data issues
\item Sphericity assumption
\end{itemize}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Sphericity Assumption of Repeated-measures ANOVA}
\begin{itemize}
\item The variances of the differences between all possible
pairs of groups (i.e., levels of the independent
variable) are equal.
\item Similar to homogeneity of variance assumption.
\item Concerned with different scores.
\end{itemize}
\begin{alertblock}{Testing for sphericity}
Mauchly's test, similar to Levene's test.
\begin{lstlisting}
mauchly.test(anova.result)
\end{lstlisting}
The common solution of deviation is to adjust for degrees of freedom.
\begin{itemize}
\item Greenhouse-Geisser correction.
\end{itemize}
\end{alertblock}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{\texttt{aov()} function}
\begin{lstlisting}[language=R]
aov(formula, data=data.frame)
\end{lstlisting}
\begin{table}
\caption{Special Symbols}
\small
\begin{tabular}{p{0.2\textwidth}p{0.70\textwidth}}
Symbol & Usage\\
\hline
\verb|~| & The delimiter between response variable and explanatory variables. \\
\verb|+| & The delimiter between explanatory variables.\\
\verb|:| & The interaction between explanatory variables.\\
\verb|*| & All possible interaction terms.\\
\verb|^| & The highest order of interaction.\\
\verb|.| & All the independent variables.\\
\hline
\end{tabular}
\end{table}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{ANOVA Design Settings}
\begin{table}
\caption{ANOVA Design}
\begin{tabular}{p{0.3\textwidth}p{0.60\textwidth}}
\hline
Expression & Meaning\\
\hline
\verb#y ~ A# & One-way ANOVA. \\
\verb#y~x+A# & One-way ANCOVA with one covariate $x$.\\
\verb#y~A*B# & Two-way ANOVA for factorial design.\\
\verb#y~x1+x2+A*B# & Two-way ANOVA with two covariates in a factorial design.\\
\verb#y~B+A# & Randomized block design ($B$ is the block factor).\\
\verb#y ~ A + Error(Subject)# & Repeated measures ANOVA design.\\
\hline
\end{tabular}
\end{table}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Pearson's Chi-square test}
\framesubtitle{Comparing categorical outcomes across different treatment groups}
\begin{itemize}
\item Setting: $m$ treatments, $n$ categories of outcomes
\item Compute the test statistic:
$$
\chi^2 = \sum_{i} \frac{(O_i - E_i)^2}{E_i} \sim \chi^2(df=(m-1)(n-1))
$$
\item Use Yates' correction for lack of continuity When
the expected frequencies are too low:
$$
\chi_{yates}^2 = \sum_{i} \frac{(|O_i - E_i|-0.5)^2}{E_i} \sim \chi^2 (df=(m-1)(n-1))
$$
\item Reject $H_0$ if $\chi^2 > \chi^2(\alpha, df=(m-1)(n-1))$
\end{itemize}
\begin{lstlisting}
prop.test(.., correct=FALSE)
\end{lstlisting}
\end{frame}
\section{Nonparametric tests}
\begin{frame}[t]
\frametitle{Nonparametric tests}
\begin{itemize}
\item 2-groups: Mann-Whitney rank-sum test for continuous outcome in completely randomized
parallel design.
\item 2-groups: Wilcoxon signed rank test for continuous outcome in crossover design.
\item 3+-groups: Kruskal-Wallis rank-sum test for continuous outcome in completely
randomized parallel design.
\item 3+-groups: Friedman's rank sum test for continuous outcome in randomized
block design.
\item 2-groups: McNemar's test for binary outcome in crossover design.
\item 3+-groups: Cochrane's Q-test for binary outcome in crossover design.
\end{itemize}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Wilcoxon-Mann-Whitney test}
\framesubtitle{Comparing two treatment groups with non-normal data}
\begin{itemize}
\item The $t$-test is usually quite robust against departures
from normality.
\item However, when the departure is extreme, use
Mann-Whitney-Wilcoxon U-test (a.k.a Wilcoxon
rank-sum test)
\item Proposed by Wilcoxon (1945) for equal sample sizes
\item Extended to arbitrary sample sizes by Mann and Whitney
(1947)
\item Virtually identical to performing ordinary parametric
two-sample t-test on the combined ranks.
\end{itemize}
\begin{lstlisting}
wilcox.test(...)
\end{lstlisting}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Wilcoxon rank-sum test}
\framesubtitle{Nonparametric version of t-test}
When the assumption of normality and equal variances are violated, we
may use the nonparametric test - Wilcoxon rank-sum test.
<<eval=TRUE>>=
wilcox.test(diff ~ TRT, data=dbp)
@
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Kruskal-Wallis Test}
\framesubtitle{3+-groups of continuous outcome in randomized parallel design}
\begin{itemize}
\item Kruskal-Wallis H test; One-way ANOVA on ranks
\item Extension of Mann-Whitney U test
\end{itemize}
\begin{alertblock}{\center Method}
\begin{enumerate}
\item Rank all data from all groups together
\item Compute the statistic:
$$
H = (N-1)\frac{\sum_{i=1}^g n_i (\bar{r}_{i*} - \bar{r})^2}{\sum_{i=1}^g \sum_{j=1}^{n_i} (r_{ij} - \bar{r})^2} \sim \chi^2(df=g-1)
$$
where
\begin{itemize}
\item $n_i$: number of observations in group $i$;
\item $r_{ij}$: rank (among all) of observation $j$ from group $i$;
\item $N$: total number of observations across all groups;
\item $\bar{r}_{i*} = \frac{\sum_{j=1}^{n_i} r_{ij}}{n_i}$ is the average rank of all observations in group $i$;
\item $\bar{r} = \frac{N+1}{2}$ is the average of all $r_{ij}$
\end{itemize}
\end{enumerate}
\end{alertblock}
\begin{lstlisting}
kw.test(...)
\end{lstlisting}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Friedman's Q test}
\framesubtitle{3+-groups of continuous outcome in randomized block design}
\begin{itemize}
\item randomized block design (随机区组设计)
\item not necessarily symmetric (不需要满足对称假设)
\item Two-way ANOVA by ranks?
\end{itemize}
\begin{block}{\center \small Method}
\begin{itemize}
\small
\item Given data $\{x_{ij}\}_{n \times k}$ with $n$ rows (blocks) and $k$ columns (treatments);
\item Replace the data with new matrix $\{r_{ij}\}$, where $r_{ij}$ is the rank of $x_{ij}$ within
block $i$ ($r_{ij}=1,\dots,k$).
\item Calculate the values:
\begin{itemize}
\item $\bar{r}_{*j} = \frac{1}{n} \sum_{i=1}^n r_{ij}$
\item $\bar{r} = \frac{1}{nk}\sum_{i=1}^n \sum_{j=1}^k r_{ij}$
\item $\textrm{SS}_t = n \sum_{j=1}^k (\bar{r}_{*j} - \bar{r})^2$
\item $\textrm{SS}_e = \frac{1}{n(k-1)} \sum_{i=1}^n \sum_{j=1}^k (r_{ij} - \bar{r})^2$
\end{itemize}
\item Calculate the test statistic $Q = \frac{\textrm{SS}_t}{\textrm{SS}_e}$. $Q$ does not need
to be adjusted for tied values in the data.
\item If $n>15$ or $k>4$, $Q \sim \chi^2_{df=k-1}$
\end{itemize}
\end{block}
\begin{lstlisting}
friedman.test(...)
\end{lstlisting}
\end{frame}
\begin{frame}[t]
\frametitle{Cochrane's Q test}
\framesubtitle{2+ correlated groups of dichotomous outcomes}
\begin{itemize}
\item Similar to Friedman's test, but with binary outcomes.
\item Randomized block design.
\item $H_0: $ The treatments are all equally effective.
\end{itemize}
\begin{block}{\center \small Method}
The test statistic
$$
T = k(k-1) \frac{\sum_{j=1}^k (X_{*j} - \frac{N}{k})^2}{\sum_{i=1}^b X_{i*}(k - X_{i*})} \sim \chi^2_{df=k-1}
$$
where
\begin{itemize}
\item $k$: number of treatments;
\item $X_{*j}$: total for $j^{th}$ treatment;
\item $b$: number of blocks;
\item $X_{i*}$: row total for the $i^{th}$ block;
\item $N$: grand total
\end{itemize}
\end{block}
\end{frame}
\section{Permutation or Resampling Approaches}
\begin{frame}[t,containsverbatim]
\frametitle{Permutation method}
\begin{enumerate}
\item Iteratively permute the randomization (treatment assignment)
\item Compute the statistic of interest for each permuted sample
\item Generate the empirical distribution of the statistics
\item Compare the original statistic with the permuted ones to determine
the empirical $p$-value
\item Reject or not reject the null hypothesis
\end{enumerate}
\begin{alertblock}{\center \small Exercise}
Use \texttt{sample()} function to realize the permutation approach.
\end{alertblock}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Bootstrapping method}
Boostrapping is a resampling procedure extensively used in statistics
when any of the assumptions underlying the validity of the $t$-test
don't hold for the data under analysis.
\begin{enumerate}
\item Iteratively drawing samples with replacement from
the data.
\item Calculating the statistic of interest for each sample
\item Generating the empirical resampling distribution of
the statistic
\item Percentile points corresponding to the Type-I error level
and the sided-ness of the alternative hypothesis of the
resampling distribution are then used in the assessment
of statistical significance.
\end{enumerate}
\begin{lstlisting}
bootstrap::bootstrap()
\end{lstlisting}
\end{frame}
\begin{frame}[t,containsverbatim]
\frametitle{Bootstrapping approach}
<<eval=TRUE>>=
library(bootstrap)
mean.diff <- function(bn, dbp)
-diff(tapply(dbp$diff[bn], dbp$TRT[bn], mean))
# number of bootstrap
nboot <- 1000
boot.mean <- bootstrap(1:dim(dbp)[1], nboot, mean.diff, dbp)
# extract the mean difference
x <- boot.mean$thetastar
quantile(x, c(0.025, 0.975))
@
\begin{alertblock}{\center \small Conclusion}
What conclusion can you make from the above result?
\end{alertblock}
\end{frame}
\end{CJK*}
\end{document}