-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOR.R
109 lines (98 loc) · 3.32 KB
/
OR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
odds.ratio <- function( two_by_two ) {
this <- environment()
this$table <- two_by_two
this$fisher <- fisher.test(two_by_two)
a <- two_by_two[1,1]
b <- two_by_two[1,2]
c <- two_by_two[2,1]
d <- two_by_two[2,2]
this$Sensitivity <- (a)/(a + c)
this$Specificity <- (d)/(d + b)
this$OR <- (a * d)/(b * c)
siglog <- sqrt( ( 1/a ) + ( 1/b ) + ( 1/c ) + ( 1/d ) )
zalph <- qnorm( 0.975 )
logOR <- log( OR )
loglo <- logOR - zalph * siglog
loghi <- logOR + zalph * siglog
this$OR_low <- exp( loglo )
this$OR_high <- exp( loghi )
df.two_by_two <- as.data.frame( two_by_two )
exposure.pos <- paste( colnames( df.two_by_two )[1], as.character( df.two_by_two[1,1] ) )
exposure.neg <- paste( colnames( df.two_by_two )[1], as.character( df.two_by_two[2,1] ) )
outcome <- paste( colnames( df.two_by_two )[2], as.character( df.two_by_two[1,2] ) )
stmt1 <- 'The odds of'
stmt2 <- outcome
stmt3 <- 'is'
stmt4 <- paste(this$OR, '(', this$OR_low, '-', this$OR_high, ')')
stmt5 <- 'given'
stmt6 <- exposure.pos
stmt7 <- 'compared to'
stmt8 <- exposure.neg
stmt8.5 <- paste('(Fisher\'s test p-value:', this$fisher$p.value, ')')
stmt9 <- '. the sensitivity and specificity are'
stmt10 <- this$Sensitivity
stmt10.5 <- 'and'
stmt11 <- this$Specificity
stmt12 <- 'respectively for'
stmt13 <- exposure.pos
stmt14 <- 'on'
stmt15 <- outcome
this$Statement <- paste( stmt1, stmt2, stmt3, stmt4, stmt5, stmt6, stmt7, stmt8, stmt8.5, stmt9, stmt10, stmt10.5, stmt11, stmt12, stmt13, stmt14, stmt15)
return(this)
}
runner <- function(data, possible_vars, constant_var) {
for (i in attributes(d.split)$names) {
for (poss_var in possible_vars) {
tab <- table(data[[i]][,poss_var],data[[i]][,constant_var])
ft <- fisher.test(tab)
o <- odds.ratio(tab)
writeLines(c('Table for',i,'Looking at', poss_var))
print(tab)
writeLines(c('Fisher test p-value:',ft$p.value))
writeLines(c('Odds ratio:', o$this$Statement))
}
}
}
guess_status <- function(guess_col, cutoff = 0.0000001) {
ver <- as.numeric(as.character(guess_col))
ver[which(ver > cutoff)] <- 1
ver[which(ver <= cutoff)] <- 0
ver <- as.character(ver)
ver[which(ver == "1")] <- "pos"
ver[which(ver == "0")] <- "neg"
return(as.factor(ver))
}
double_positive <- function(col1, col2) {
t <- cbind.data.frame(col1, col2)
ot <- rep("neg",length(col1))
ot[which(t[,1] == "pos" & t[,2] == "pos")] <- "pos"
return(as.factor(ot))
}
pick_dominant_add_total <- function(df, cutoff = 0.5) {
for (i in 1:length(colnames(df))) {
df[,i] <- as.numeric(as.character(df[,i]))
}
row.totals <- rowSums(df)
max.abund <- c()
max.abund.name <- c()
doms <- c()
for (i in 1:length(df[,1])) {
thisrow <- df[i,]
thismax <- max(thisrow)
thismaxname <- colnames(thisrow[which(thisrow == thismax)])[1]
max.abund <- c(max.abund, thismax)
max.abund.name <- c(max.abund.name, thismaxname)
thisprop <- thismax/row.totals[i]
if (thisprop >= cutoff) {
doms <- c(doms, thismaxname)
}
else {
doms <- c(doms, 'None')
}
}
df$total_seqs <- row.totals
df$max_abund <- max.abund
df$max_abund_name <- max.abund.name
df$dominant <- as.factor(doms)
return(df)
}