-
Notifications
You must be signed in to change notification settings - Fork 0
/
Growth
360 lines (271 loc) · 13 KB
/
Growth
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
```{r}
library(tidyverse)
library(ggpubr)
library(dplyr)
library(ggplot2)
library(grid)
library(fBasics)
library(tidyr)
library(reshape2)
library(agricolae)
library(Rmisc)
library(gdata)
library(rlist)
library("ggsci")
library("stringr")
library(vegan)
library(devtools)
library(rstatix)
library(lubridate)
#assign spp. colors
group.colors <- c("C.latusorum"= "#8b5e3c", "D.glynnii"= "#fbb040")
#read in data
#linear extension data
linextension<-fread("linear_extension.csv", header = TRUE)
lin<-na.omit(linextension)
#calcification data
calc<- fread("calcification_data.csv", header = TRUE)
calc_na<-na.omit(calc)
#buoyant weight data
bw<- fread("buoyant_weight.csv", header = TRUE)
bw_na<-na.omit(bw)
##################### SUMMARIZE DATA ######################
#Summarize bw data
bw_sum_change<-summarySE(bw_na, measurevar="percent_change_from_July2008", groupvars=c("sym.spp", "time_point"))
bw_sum<-summarySE(bw_na, measurevar="weight", groupvars=c("sym.spp", "time_period"))
calc_sum<-summarySE(calc, measurevar="G", groupvars=c("sym.spp"))
# sym.spp N G sd se ci
#1 C 6 0.7333333 0.1758029 0.07177124 0.1844938
#2 D 5 0.5700000 0.1328533 0.05941380 0.1649592
linex_sum<-summarySE(lin, measurevar="Difference", groupvars=c("sym.spp"))
#sym.spp N Difference sd se ci
#1 C 36 10.022222 5.360176 0.8933627 1.813623
#2 D 26 8.703846 6.801087 1.3338029 2.747018
bw_na %>%
group_by(sym.spp, time_period) %>%
get_summary_stats(percent_change_from_July2008, type = "mean_ci")
# A tibble: 4 × 6
# sym.spp time_period variable n mean sd
# <chr> <chr> <chr> <dbl> <dbl> <dbl>
#1 C June 5 2009 percent_change_from_July2008 44 262. 111.
#2 C Sep 24 2008 percent_change_from_July2008 44 41.6 24.9
#3 D June 5 2009 percent_change_from_July2008 33 264. 152.
#4 D Sep 24 2008 percent_change_from_July2008 33 40.7 29.5
bw_na %>%
group_by(sym.spp, time_point) %>%
get_summary_stats(weight, type = "mean_ci")
# A tibble: 4 × 6
# sym.spp time_point variable n mean sd
# <chr> <chr> <chr> <dbl> <dbl> <dbl>
#1 C t1 weight 44 29.9 8.44
#2 C t2 weight 44 76.7 27.7
#3 D t1 weight 33 28.4 10
#4 D t2 weight 33 73.7 36.5
############ Data visualization and analysis for linear extension and calcification data #################
# dot plot for lin extension data
lplot<-ggplot(lin, aes(x=sym.spp,y=Difference,fill=sym.spp)) +
geom_jitter()
#Boxplot linear extension data
linex_boxplot<-ggplot(data=lin)+
geom_boxplot(mapping=aes(x=sym.spp,y=Difference,fill=sym.spp), col=I("black"))+
labs(x="Symbiont species",y=bquote("Change in branch length(inches)" ))+
theme_classic()+
theme(legend.position="none", aspect.ratio = 3)+
scale_fill_manual(values=group.colors)
#Boxplot for calcification data
calc_boxplot<-ggplot(data=calc)+
geom_boxplot(mapping=aes(x=sym.spp,y=G,fill=sym.spp), col=I("black"))+
labs(x="Symbiont species",y=bquote("G" ))+
theme_classic()+
theme(legend.position="none", aspect.ratio = 3)+
scale_fill_manual(values=group.colors)
#theme(legend.position="right") +
# labels = c('June 2009', 'September 2009', 'July 2008'))
# Testing normality of calcification data for D. glynnii colonies
with(calc, shapiro.test(G[sym.spp == "D.glynnii"]))# p = 0.7785
# Shapiro-Wilk normality test for C. latusorum colonies
with(calc, shapiro.test(G[sym.spp == "C.latusorum"])) # p = 0.8023
#T test of calcification data for D. glynnii colonies
t.test(G ~ sym.spp, data=calc, alternative = "two.sided", var.equal = TRUE)
#p-value = 0.1223
#95 percent confidence interval:
# -0.05331589 0.37998256
#sample estimates:
#mean in group C mean in group D
# 0.7333333 0.5700000
# Testing normality of linear extension data for D. glynnii
with(lin, shapiro.test(Difference[sym.spp == "D.glynnii"]))# p = 0.003258
# Shapiro-Wilk normality test for C. latusorum
with(lin, shapiro.test(Difference[sym.spp == "C.latusorum"])) # p = 0.04909
#data are not normally distributed
# Non-parametric Wilcoxen test on linear extension data
wilcox.test(Difference ~ sym.spp, data = lin,
exact = FALSE)
#p-value = 0.1664
########################### Plot visualizations of bw data #################################
# boxplot of bw over time
bxp <- ggboxplot(
bw, x = "time_point", y = "weight",
color = "sym.spp"
)
bxp+ labs(x="Time",y=bquote("Buoyant weight over time" ))+
theme_classic()+
theme(legend.position="none", aspect.ratio = 1.5)+
scale_fill_manual(values=group.color)
#boxplot of percent change in buoyant weight since July 2008 (t0)
bxp <- ggboxplot(
bw, x = "time_point", y = "percent_change_from_July2008",
color = "sym.spp"
)
bxp+ labs(x="Time",y=bquote("Percent change in buoyant weight since July 2008" ))+
theme_classic()+
theme(legend.position="none", aspect.ratio = 1.5)+
scale_fill_manual(values=group.color)
bwplot<-ggplot(bw_na, aes(x=sym.spp, y=bw, group=time_point, fill=time_point))+
geom_point(bw_na, aes(x=sym.spp, y=percent_change_from_July2008, color=sym.spp), shape=1, alpha=0.3, position=pd)+
scale_color_manual(values=group.colors)+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
geom_hline(yintercept=0, color='black', linetype='dashed')+
ylab('Percent Change in BW from June 2008')
#geom_errorbar(aes(x=sym.spp, ymin=BWlowerci, ymax=BWupperci, color=time_period), position=pd, lwd=0.5, width=0.3)+
geom_jitter(position = position_jitter(width = 0.2, height = 0.1),aes(shape = time_period, alpha=0.7, color='gray'))+
scale_shape_manual(values = c(23,22,21))+
geom_point(data = bw_sum, size = 4, aes(shape= time_period, fill=sym.spp))+
theme_classic()+
geom_errorbar(aes(ymin=weight-sd, ymax=weight+sd), width=.2,
position=position_dodge(.9))+
stat_summary(fun.data=bw_sum, fun.args = list(mult=1),
geom="errorbar", color="red", width=0.2) +
stat_summary(fun=mean, geom="point", color="red")
#line graph of percent change in buoyant weight since July 2008 (t0)
line_bw<- ggplot(bw_sum,aes(x=time_point,y=percent_change_from_July2008,group=sym.spp, color=sym.spp))+
geom_line()+
theme_classic()+
geom_errorbar(aes(ymin=percent_change_from_July2008-ci, ymax=percent_change_from_July2008+ci), width=.1)+
scale_fill_manual(group.colors)
################### linear mixed model testing of bw change by spp and frag as random effect #####################
bw_lmer<- lmer(percent_change_from_July2008 ~ sym.spp * time_period + (1|colony), data=bw_na)
step(bw_lmer, reduce.random = F) #tests model factors by AIC
anova(bw_lmer)
library(emmeans)
#post hoc comparisons with emmeans
print(emmeans(bw_lmer, list(pairwise ~ sym.spp|time_period)), adjust = c('mvt'))
print(emmeans(bw_lmer, list(pairwise ~ time_period|sym.spp)), adjust = c('mvt'))
print(emmeans(bw_lmer, list(pairwise ~ colony|sym.spp)), adjust = c('mvt'))
bw_na %>%
group_by(sym.spp,time_period) %>%
identify_outliers(percent_change_from_July2008)
###CHECK ASSUMPTIONS
#check for outliers
bw %>%
group_by(time_point, sym.spp) %>%
identify_outliers(weight)
# A tibble: 7 × 8
# time_period colony sym.spp time_point weight percent_change_from_July2008 is.outlier is.extreme
# <chr> <chr> <chr> <chr> <dbl> <dbl> <lgl> <lgl>
#1 C t0 39B July 2008 32.6 NA TRUE FALSE
#2 D t0 29 July 2008 28.7 NA TRUE FALSE
#3 D t0 63 July 2008 28.4 NA TRUE FALSE
#4 D t0 104 July 2008 32.3 NA TRUE FALSE
#5 D t1 63 Sep 24 2008 57.8 103. TRUE FALSE
#6 D t1 104 Sep 24 2008 58.5 81.3 TRUE FALSE
#7 D t2 63 June 5 2009 178. 526. TRUE FALSE
bw %>%
group_by(time_period, sym.spp) %>%
identify_outliers(log_bw)
# A tibble: 6 × 8
# sym.spp time_period colony time_point weight percent_change_from_July2008 is.outlier is.extreme
# <chr> <chr> <chr> <chr> <dbl> <dbl> <lgl> <lgl>
#1 D June 5 2009 67 t2 121. 672. TRUE FALSE
#2 D June 5 2009 ss103 t2 142. 667. TRUE FALSE
#3 C Sep 24 2008 53 t1 46.1 126. TRUE FALSE
#4 D Sep 24 2008 27 t1 15.5 -22.5 TRUE FALSE
#5 D Sep 24 2008 61 t1 36.1 109. TRUE FALSE
#6 D Sep 24 2008 63 t1 57.8 103. TRUE FALSE
#there are outliers, but no extreme outliers. Further analyzing with a qqplot since sample size is >50.
#check normality
bw %>%
group_by(time_point, sym.spp) %>%
shapiro_test(weight)
#1 C t0 weight 0.948 0.0446
#2 D t0 weight 0.886 0.00232
#3 C t1 weight 0.926 0.00737
#4 D t1 weight 0.821 0.0000843
#5 C t2 weight 0.966 0.213
#6 D t2 weight 0.908 0.00846
#these data are not normally distributed (except for C. latusorum at time 2)
bw %>%
group_by(time_period, sym.spp) %>%
shapiro_test(percent_change_from_July2008)
# time_point variable statistic p
# <chr> <chr> <dbl> <dbl>
#1 t0 weight 0.929 0.000342
#2 t1 weight 0.903 0.0000236
#3 t2 weight 0.958 0.0123
#these data are not normally distributed, but sample size is large.
#so, check normality in qq plot
ggqqplot(bw, "weight", ggtheme = theme_bw())+
facet_grid(time_point~sym.spp, labeller ="label_both")
ggqqplot(bw_na, "percent_change_from_July2008", ggtheme = theme_bw())+
facet_grid(time_period~sym.spp, labeller ="label_both")
#Data are falling approximately on the reference line. assuming normality. test with transformed data (individually)
#create new df without outliers to rerun stats
bw_no_outliers<- bw[-c(155,157,160,161,162,169,175,181,189,193,194, 199, 203, 210, 214, 215, 220, 225), ]
bw_na_no_outliers<- bw_na[-c(215,225), ]
#log transform
bw_na$log_bw <- log10(bw_na$percent_change_from_July2008)
#square root transform
bw_na$sqrt_bw <- sqrt(bw_na$percent_change_from_July2008)
#cube root transform
bw_na$cube_y <- bw_na$percent_change_from_July2008^(1/3)
no_na_bw<-na.omit(bw_na)
#when tested each, no significant deviations from non-transformed data.
#identify and remove outliers and test normality
bw %>%
group_by(time_point, sym.spp) %>%
identify_outliers(weight)
bw_na_no_outliers %>%
group_by(time_period, sym.spp) %>%
identify_outliers(weight)
bw %>%
group_by(time_point, sym.spp) %>%
identify_outliers(weight)
ggqqplot(bw_no_outliers, "weight", ggtheme = theme_bw())+
facet_grid(time_point~sym.spp, labeller ="label_both")
ggqqplot(bw_na_no_outliers, "percent_change_from_July2008", ggtheme = theme_bw())+
facet_grid(time_period~sym.spp, labeller ="label_both")
###################ANOVA test and Mauchly's test of sphericity for bw data #################
# bw over time
res.aov <- aov(weight ~ time_point + sym.spp + time_point:sym.spp, data = bw)
summary(res.aov)
# Df Sum Sq Mean Sq F value Pr(>F)
#time_point 2 133620 66810 180.077 <2e-16 ***
#sym.spp 1 197 197 0.530 0.468
#time_point:sym.spp 2 35 17 0.047 0.954
#Residuals 225 83477 371
#---
#Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#bw percent change from t0
res.aov2 <- aov(percent_change_from_July2008 ~ time_period + sym.spp +time_period:sym.spp, data = bw_na)
summary(res.aov2)
# Df Sum Sq Mean Sq F value Pr(>F)
#time_period 1 1891364 1891364 213.184 <2e-16 ***
#sym.spp 1 9 9 0.001 0.975
#time_period:sym.spp 1 70 70 0.008 0.929
#Residuals 150 1330799 8872
#---
#Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#For each way of analyzing these buoyant weight data, there are no differences between colonies with either symbiont spp.
#ANOVA Table (type III tests)
# Effect DFn DFd F p p<.05 ges
# time_period 1 76 292.16 9.34e-28 * 0.587
#Mauchly’s test p-value is significant, the variance is not equally distributed
# pairwise comparisons
pwc <- bw %>%
pairwise_t_test(
weight ~ time_period, paired = TRUE,
p.adjust.method = "bonferroni"
)
pwc
anova_test(bw$weight)