source('~/Google Drive File Stream//My Drive/Work/Microarrays/2014 miRNA Arrays/Final Analysis/Scripts/Custom functions.R', chdir = TRUE)
dataset <- read.csv("~/Google Drive File Stream/My Drive/miRNA/dataset.csv")
library(pacman)
p_load("kableExtra","caret","tidyverse","reshape2","e1071","JamesTools","OptimalCutpoints","Boruta","ggplot2","randomForest","ROCR","rpart","party","rpart.plot","partykit","glmnet","xgboost","Ringo", "parallel", "doParallel", "DMwR", "cowplot", "pROC", "readxl")
rownames(dataset) <- dataset$X
dataset<- select(dataset, -X)
NTproBNP <- as.data.frame(read_excel("~/Google Drive File Stream/My Drive/miRNA/NTproBNP.xlsx"))
pheno <- read_excel("~/Google Drive File Stream/My Drive/miRNA/NTproBNP.xlsx",
sheet = "Sheet3")
ml.Spear <- dataset[dataset$AB == "A",] %>% select(.,-PHstatus,-AB)
ml.Spear$group <- as.factor(ml.Spear$group)
setB<- dataset[dataset$AB == "B",] %>% select(.,-PHstatus,-AB)
setB$group <- as.factor(setB$group)
setB2 <- setB
Boruta is a 'wrapper algorithm for all relevant feature selection'.
#run boruta
multi.boruta<- list()
multi<- function(i){
fit.boruta <- Boruta(factor(group)~., data=ml.Spear, maxRuns = 300, pValue = 0.01)
boruta.df <- data.frame(attStats(fit.boruta))
multi.boruta[[i]] <- rownames(boruta.df[boruta.df$decision =='Confirmed',])
}
set.seed(100)
multi.boruta<- parallel::mclapply(1:100, multi)
#See how many times each miRNA appears when boruta is run 100x
multi.boruta<- as.data.frame(table(unlist(multi.boruta)))
multi.boruta.mirs<- as.character(multi.boruta[which(multi.boruta$Freq>10),1])
multi.boruta
## Var1 Freq
## 1 let.7d.3p 100
## 2 miR.125a.5p 5
## 3 miR.187.5p 100
## 4 miR.18b.5p 36
## 5 miR.33b.3p 2
## 6 miR.3613.3p 100
## 7 miR.451a 2
## 8 miR.4707.5p 100
## 9 miR.572 100
## 10 miR.636 100
## 11 miR.652.3p 100
## 12 miR.671.5p 97
## 13 miR.933 100
ml.Spear$group <- as.factor((ml.Spear$group))
m<- paste(as.vector(multi.boruta.mirs, mode = "any"), collapse = "+")
RFm<- as.formula(paste("group ~ ",m,sep = ""))
Boruta.data<- ml.Spear[,c("group",multi.boruta.mirs)]
tree.Boruta<- setB[,c("group", multi.boruta.mirs)]
Using caret to train:
10 fold cv, with 10 repeats.
mtry
- number of variables randomly sampled as candidates at each split (default is sqrt(no. of variables))
ntree
- number of trees to grow
Can't optemise ntree
and mtry
in the same run, so optemise ntree
first, using default mtry
:
control<- trainControl(method = 'repeatedcv',
number = 10,
repeats = 10, classProbs = TRUE, savePredictions = TRUE)
metric <- "Accuracy"
tunegrid <- expand.grid(.mtry=seq(from =1, to =4))
modellist<- list()
for (ntree in c(100, 250, 500, 750, 1000, 1250, 1500)) {
set.seed(100)
fit <- caret::train(RFm, data=Boruta.data, method="rf", metric=metric, tuneGrid=tunegrid, trControl=control, ntree=ntree)
key <- toString(ntree)
modellist[[key]] <- fit
}
# compare results
results <- resamples(modellist)
res<- summary(results)
res<- as.data.frame(res$statistics$Accuracy)
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: 100, 250, 500, 750, 1000, 1250, 1500
## Number of resamples: 100
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 100 0.5714286 0.7142857 0.8571429 0.8320238 1 1 0
## 250 0.5714286 0.7142857 0.8571429 0.8373214 1 1 0
## 500 0.5714286 0.7142857 0.8571429 0.8410714 1 1 0
## 750 0.5714286 0.7142857 0.8571429 0.8426786 1 1 0
## 1000 0.5714286 0.7142857 0.8571429 0.8443452 1 1 0
## 1250 0.5714286 0.7142857 0.8571429 0.8443452 1 1 0
## 1500 0.5714286 0.7142857 0.8571429 0.8443452 1 1 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 100 0.00000000 0.4086538 0.6956522 0.6376523 1 1 0
## 250 0.00000000 0.4086538 0.6956522 0.6469564 1 1 0
## 500 0.00000000 0.4086538 0.6956522 0.6548932 1 1 0
## 750 0.00000000 0.4086538 0.6956522 0.6578694 1 1 0
## 1000 0.08695652 0.4166667 0.6956522 0.6635938 1 1 0
## 1250 0.08695652 0.4166667 0.6956522 0.6635938 1 1 0
## 1500 0.08695652 0.4166667 0.6956522 0.6635938 1 1 0
Now optmise mtry
:
ntree = as.numeric(rownames(res)[which(res$Mean == max(res$Mean))])[1]
set.seed(100)
tunegrid <- expand.grid(.mtry=seq(from = 1, to=4, by = 0.5))
modellist<- list()
control<- trainControl(method = 'repeatedcv',
number = 10,
repeats = 10, classProbs = TRUE, savePredictions = TRUE)
set.seed(100)
fit <- caret::train(RFm, data=Boruta.data, method="rf", metric="Accuracy", tuneGrid=tunegrid, trControl=control, ntree=ntree)
fit
## Random Forest
##
## 71 samples
## 10 predictors
## 2 classes: 'HV', 'PH'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 10 times)
## Summary of sample sizes: 64, 63, 64, 64, 64, 65, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 1.0 0.8400595 0.6546686
## 1.5 0.8193452 0.6146054
## 2.0 0.8160714 0.6057677
## 2.5 0.8236310 0.6243189
## 3.0 0.8050000 0.5864137
## 3.5 0.7912500 0.5607784
## 4.0 0.8004762 0.5739003
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 1.
paste("ntree used:", ntree)
## [1] "ntree used: 1000"
tunegrid <- expand.grid(.mtry=fit$bestTune$mtry)
control<- trainControl(method = 'repeatedcv',
number = 10,
repeats = 10,
savePredictions = TRUE,
classProbs = TRUE)
fit.Boruta <- caret::train(RFm, data=Boruta.data, method="rf", metric="Accuracy", tuneGrid=tunegrid, trControl=control, ntree=ntree)
RFBoruta.train <- predict(fit.Boruta, tree.Boruta)
BorutaInterim<- confusionMatrix(as.factor(RFBoruta.train), as.factor(tree.Boruta$group), positive = "PH")
BorutaInterim
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 10 3
## PH 4 19
##
## Accuracy : 0.8056
## 95% CI : (0.6398, 0.9181)
## No Information Rate : 0.6111
## P-Value [Acc > NIR] : 0.01065
##
## Kappa : 0.5855
##
## Mcnemar's Test P-Value : 1.00000
##
## Sensitivity : 0.8636
## Specificity : 0.7143
## Pos Pred Value : 0.8261
## Neg Pred Value : 0.7692
## Prevalence : 0.6111
## Detection Rate : 0.5278
## Detection Prevalence : 0.6389
## Balanced Accuracy : 0.7890
##
## 'Positive' Class : PH
##
The rpart
function from the rpart
package can be utilised to grow a regression tree. This tree is built by splitting the data on the single variable which best splits the group in 2. Once the data is separated, this process is applied to each sub-group separately recursively until the subgroups reach a minimum size (defined as 3 below), or no improvement can be made.
Rpart uses a variable selection algorithm called recursive feature elimination (REF, also known as backward selection).
minsplit
- min no of observations that must exist in a node in order for a split to be attempted
minbucket
- min no of observations in any terminal node
#train control
tc <- trainControl(method="repeatedcv", number=10, repeats = 10, classProbs=TRUE, summaryFunction = twoClassSummary, savePredictions = TRUE)
#maxcompete
set.seed(20)
fit.caret.rpart <- caret::train(group ~ ., data=ml.Spear, method='rpart', metric="ROC", trControl=tc, control=rpart.control(minsplit=2, minbucket=3, surrogatestyle = 1, maxcompete = 0))
fit.caret.rpart$bestTune
## cp
## 1 0
prp(fit.caret.rpart$finalModel, main="PH from HV Rpart model", extra=2, varlen=0)
plot(as.party(fit.caret.rpart$finalModel), main="PH from HV Rpart model", drop_terminal=F)
rpartmirs<-labels(fit.caret.rpart$finalModel)[-1]
rpartmirs<- gsub("<.*","", rpartmirs)
rpartmirs<- unique(gsub(">.*","", rpartmirs))
predrpart2 <- predict(fit.caret.rpart, setB2)
fit.preds.table.Rpart<- cbind(rownames(setB2),predrpart2,as.character(setB2$group))
fit.preds.table.Rpart<- as.data.frame(fit.preds.table.Rpart)
colnames(fit.preds.table.Rpart) <- c("sample","fit.preds","group")
RpartInterim<- confusionMatrix(predrpart2, setB2$group, positive = "PH")
RpartInterim
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 9 2
## PH 5 20
##
## Accuracy : 0.8056
## 95% CI : (0.6398, 0.9181)
## No Information Rate : 0.6111
## P-Value [Acc > NIR] : 0.01065
##
## Kappa : 0.5743
##
## Mcnemar's Test P-Value : 0.44969
##
## Sensitivity : 0.9091
## Specificity : 0.6429
## Pos Pred Value : 0.8000
## Neg Pred Value : 0.8182
## Prevalence : 0.6111
## Detection Rate : 0.5556
## Detection Prevalence : 0.6944
## Balanced Accuracy : 0.7760
##
## 'Positive' Class : PH
##
LASSO (Least Absolute Shrinkage and Selection Operator) is a feature selection method designed to reduce over-fitting. It automatically selects the significant variables by shrinking the coefficients of predictors deemed unimportant to zero.
Glmnet
(Vignette) is a package that uses penalised maximum likelihood to fit a generalised linear model.
fit.glmcv <- cv.glmnet(x=as.matrix(ml.Spear[-1]), y=as.factor(ml.Spear$group), alpha=1, family='binomial', nfolds=10)
#summary(fit.glmcv)
other.glmcv <- cv.glmnet(x=as.matrix(ml.Spear[-1]), y=as.factor(ml.Spear$group), alpha=1, family='binomial', nfolds=10, type.measure = "class")
model.lambda <- fit.glmcv$lambda.min
plot(fit.glmcv, cex.axis=1, cex.lab=1,cex.main=1)
plot(other.glmcv, cex.axis=1, cex.lab=1, cex.main=1)
In the plot above, the red indicates the cross validation curve, and the the error bars show upper and lower standard deviation curves.
The two dotted lines show lambda.min
- the value of lambda.1se
, which gives the most regularised model such that the error is within 1 standard error of the minimum.
#refit model for new lambda
paste("lambda value:", fit.glmcv$lambda.min)
## [1] "lambda value: 0.0416631184443247"
tuneLASSO<- expand.grid(.alpha = 1, .lambda = fit.glmcv$lambda.min)
LASSO.min<- caret::train(group ~ ., data = ml.Spear, method = "glmnet", trControl = control, family = 'binomial', tuneGrid = tuneLASSO)
lasso.model<- coef(LASSO.min$finalModel, LASSO.min$bestTune$lambda) %>% as.matrix() %>% as.data.frame() %>% rownames_to_column %>% filter(abs(`1`) >0)
ml.Spear.LASSO <- cbind("group" = ml.Spear$group, ml.Spear[,colnames(ml.Spear) %in% lasso.model$rowname])
paste("Number of miRs in LASSO model:",length(lasso.model$rowname)-1)
## [1] "Number of miRs in LASSO model: 13"
kable(lasso.model, caption = "miRs retained by LASSO") %>% kable_styling(full_width = TRUE)
rowname | 1 |
---|---|
(Intercept) | -15.0237033 |
let.7d.3p | 0.4949133 |
miR.1306.3p | 0.0077301 |
miR.148a.3p | 0.5795444 |
miR.187.5p | 0.1561147 |
miR.34a.5p | 2.2138448 |
miR.451a | -0.1104036 |
miR.4707.5p | 2.1832830 |
miR.484 | 0.9258305 |
miR.494 | -0.0658810 |
miR.548am.5p | 0.2649530 |
miR.572 | 0.4641874 |
miR.636 | -1.3620149 |
miR.671.5p | 0.0123772 |
val.LASSO<- setB[, colnames(setB) %in% colnames(ml.Spear)]
LASSO.pred.min <- predict(LASSO.min, newdata=val.LASSO[-1])
predicted.classes<-as.character(LASSO.pred.min)
fit.preds.table.LASSO<- as.data.frame(cbind(as.character(rownames(setB)),as.character(predicted.classes),as.character(setB2$group)))
colnames(fit.preds.table.LASSO) <- c("sample","LASSO","group")
LASSOInterim<- confusionMatrix(LASSO.pred.min,val.LASSO$group, positive = "PH")
LASSOInterim
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 9 5
## PH 5 17
##
## Accuracy : 0.7222
## 95% CI : (0.5481, 0.858)
## No Information Rate : 0.6111
## P-Value [Acc > NIR] : 0.1143
##
## Kappa : 0.4156
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.7727
## Specificity : 0.6429
## Pos Pred Value : 0.7727
## Neg Pred Value : 0.6429
## Prevalence : 0.6111
## Detection Rate : 0.4722
## Detection Prevalence : 0.6111
## Balanced Accuracy : 0.7078
##
## 'Positive' Class : PH
##
XGBoost (Extreme Gradient Boosting) is an optimised distributed gradient boosting library that performs better than gradient boosting (GBM) framework alone.
nrounds
- maximum number of iterations (similar to no. of trees grown).
eta
- [range: (0,1)] - learning rate. After every round, it shrinks the feature weights to reach the best optimum. Lower eta = slower computation
gamma
[range:
max_depth
[range:
min_child_weight
[range:
subsample
[range: (0,1)] - Controls the number of samples supplied to a tree
colsample_bytree
[range: (0,1)] - controls the no. of features (variables) supplied to a tree
Parameter | Default | Range | Values attempted | Final model |
---|---|---|---|---|
nrounds |
100 | 100 - 10 000 | 200 | |
eta |
0.3 | (0,1) | 0.01, 0.025, 0.05, 0.1, 0.2, 0.3 | 0.025 |
gamma |
0 | 0,0.05, 0.1, 0.5, 0.7, 0.9, 1 | 0.05 | |
max_depth |
6 | 1, 2, 3, 4, 5, 6 | 1 | |
colsample_bytree |
1 | (0,1) | 0.4, 0.6, 0.8, 1.0 | 0.6 |
subsample |
1 | (0,1) | 0.5, 0.75, 1.0 | 0.5 |
min_child_weight |
1 | 1, 2, 3, 4 | 1 |
The default metric to determine the best settings in train is accuracy, with Kappa also being calculated for a classification model.
library(xgboost)
train<- ml.Spear[-1]
train<- data.matrix(train)
validation<- setB[-1]
validation<- data.matrix(validation)
labels<- ml.Spear$group
#Set up XGBoost with default hyperparameters
grid_default <- expand.grid(
nrounds = 100,
max_depth = 6,
eta = 0.3,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1
)
train_control <- caret::trainControl(
method = "none",
verboseIter = FALSE, # no training log
allowParallel = TRUE
)
xgb_base<- caret::train(
x = train,
y = as.factor(labels),
trControl = train_control,
tuneGrid = grid_default,
method = "xgbTree",
verbose = TRUE,
scale_pos_weight = (72/43) #(Total no. samples / no. positive in groupA)
)
xgbpredict_base<- predict(xgb_base, validation)
confusionMatrix(xgbpredict_base, as.factor(setB$group), positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 9 5
## PH 5 17
##
## Accuracy : 0.7222
## 95% CI : (0.5481, 0.858)
## No Information Rate : 0.6111
## P-Value [Acc > NIR] : 0.1143
##
## Kappa : 0.4156
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.7727
## Specificity : 0.6429
## Pos Pred Value : 0.7727
## Neg Pred Value : 0.6429
## Prevalence : 0.6111
## Detection Rate : 0.4722
## Detection Prevalence : 0.6111
## Balanced Accuracy : 0.7078
##
## 'Positive' Class : PH
##
nrounds<- seq(from = 100, to =1000, by = 50)
#tune using caret
tune_grid <- expand.grid(
#nrounds = seq(from = 100, to = 1000, by = 20),
nrounds = nrounds,
eta = 0.05,
max_depth = c(1, 2, 3),
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1
)
set.seed(25)
tune_control <- caret::trainControl(
method = "repeatedcv", # cross-validation
number = 10, # with n folds
repeats = 10, #the no. of complete sets of folds to compute
#index = createFolds(tr_treated$Id_clean), # fix the folds
verboseIter = FALSE, # no training log
allowParallel = TRUE
)
set.seed(25)
xgb_tune <- caret::train(
x = train,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid,
method = "xgbTree",
verbose = TRUE
#, scale_pos_weight = (35/21)- 1
)
kable(xgb_tune$bestTune, caption = "1st tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
17 | 900 | 1 | 0.05 | 0 | 1 | 1 | 1 |
tune_grid2 <- expand.grid(
nrounds = nrounds,
eta = 0.05,
max_depth = c(xgb_tune$bestTune$max_depth -1, xgb_tune$bestTune$max_depth, xgb_tune$bestTune$max_depth +1),
gamma = 0,
colsample_bytree = 1,
min_child_weight = c(0,1,2,3,4),
subsample = 1
)
set.seed(25)
xgb_tune2 <- caret::train(
x = train,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid2,
method = "xgbTree",
verbose = TRUE
#, scale_pos_weight = (35/21) -1
)
kable(xgb_tune2$bestTune, caption = "2nd tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
142 | 500 | 1 | 0.05 | 0 | 1 | 2 | 1 |
tune_grid3 <- expand.grid(
nrounds = nrounds,
eta = 0.05,
max_depth = xgb_tune2$bestTune$max_depth,
gamma = 0,
colsample_bytree = c(0.4, 0.6, 0.8, 1.0),
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = c(0.5, 0.75, 1.0)
)
set.seed(25)
xgb_tune3 <- caret::train(
x = train,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid3,
method = "xgbTree",
verbose = TRUE
#, scale_pos_weight = (35/21) -1
)
kable(xgb_tune3$bestTune, caption = "3rd tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
12 | 650 | 1 | 0.05 | 0 | 0.4 | 2 | 0.5 |
tune_grid4 <- expand.grid(
nrounds = nrounds,
eta = 0.05,
max_depth = xgb_tune3$bestTune$max_depth,
gamma = c(0,0.05, 0.1, 0.5, 0.7, 0.9, 1),
colsample_bytree = xgb_tune3$bestTune$colsample_bytree,
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = xgb_tune3$bestTune$subsample
)
set.seed(25)
xgb_tune4 <- caret::train(
x = train,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid4,
method = "xgbTree",
verbose = TRUE
#, scale_pos_weight = (35/21) -1
)
kable(xgb_tune4$bestTune, caption = "4th tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
28 | 500 | 1 | 0.05 | 0.05 | 0.4 | 2 | 0.5 |
tune_grid5 <- expand.grid(
nrounds = seq(from = 100, to = 10000, by = 50),
eta = c(0.01,0.025,0.05,0.1),
max_depth = xgb_tune3$bestTune$max_depth,
gamma = xgb_tune4$bestTune$gamma,
colsample_bytree = xgb_tune3$bestTune$colsample_bytree,
min_child_weight = xgb_tune2$bestTune$min_child_weight,
subsample = xgb_tune3$bestTune$subsample
)
set.seed(25)
xgb_tune5 <- caret::train(
x = train,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid5,
method = "xgbTree",
verbose = TRUE
#, scale_pos_weight = (35/21) -1
)
kable(xgb_tune5$bestTune, caption = "5th tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
284 | 4300 | 1 | 0.025 | 0.05 | 0.4 | 2 | 0.5 |
#look at features from final model
xgblabels <- ifelse(labels == "PH", 1,0)
xgb_final<- xgboost(data = train, label = xgblabels, nrounds = xgb_tune5$bestTune$nrounds, objective = "binary:logistic", max_depth = xgb_tune5$bestTune$max_depth, eta = xgb_tune5$bestTune$eta, min_child_weight = xgb_tune5$bestTune$min_child_weight, verbose = 0)
importance_final<- xgb.importance(feature_names = colnames(train), model = xgb_final)
xgb_mirs<- importance_final[importance_final$Gain>0.05,]
kable(xgb_mirs, caption = "Important Features (Gain > 0.05)")%>% kable_styling(full_width = TRUE)
Feature | Gain | Cover | Frequency |
---|---|---|---|
miR.636 | 0.1645373 | 0.1020921 | 0.0912052 |
miR.187.5p | 0.1396281 | 0.0728265 | 0.0464169 |
let.7d.3p | 0.1206077 | 0.1114459 | 0.0960912 |
miR.3613.3p | 0.1008889 | 0.0771543 | 0.0602606 |
miR.4707.5p | 0.0825629 | 0.0396843 | 0.0203583 |
miR.18b.5p | 0.0760481 | 0.0960875 | 0.0944625 |
miR.572 | 0.0706146 | 0.0634856 | 0.0570033 |
miR.125a.5p | 0.0616937 | 0.0762966 | 0.0814332 |
gg<- xgb.ggplot.importance(importance_matrix = xgb_mirs)
gg+ggplot2::theme(legend.position = "none", text = element_text(size = 22), axis.text = element_text(size = 18))
xgbpredictfinal<- predict(xgb_tune5, validation)
xgbpredictfinal_probs<- predict(xgb_tune5, validation, type = "prob")
confusionMatrix(xgbpredictfinal, as.factor(setB$group), positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 9 4
## PH 5 18
##
## Accuracy : 0.75
## 95% CI : (0.578, 0.8788)
## No Information Rate : 0.6111
## P-Value [Acc > NIR] : 0.05907
##
## Kappa : 0.4671
##
## Mcnemar's Test P-Value : 1.00000
##
## Sensitivity : 0.8182
## Specificity : 0.6429
## Pos Pred Value : 0.7826
## Neg Pred Value : 0.6923
## Prevalence : 0.6111
## Detection Rate : 0.5000
## Detection Prevalence : 0.6389
## Balanced Accuracy : 0.7305
##
## 'Positive' Class : PH
##
setAshort<- ml.Spear[,c("group",xgb_mirs$Feature)]
setBshort<- setB[,c("group",xgb_mirs$Feature)]
train.short<- data.matrix(setAshort[-1])
validation.short<- data.matrix(setBshort[-1])
labels<- setAshort$group
setBshort$group <- setB$group
#Set up XGBoost with default hyperparameters
grid_default <- expand.grid(
nrounds = 100,
max_depth = 6,
eta = 0.3,
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1
)
train_control <- caret::trainControl(
method = "none",
verboseIter = FALSE, # no training log
allowParallel = TRUE
)
xgb_base_short<- caret::train(
x = train.short,
y = as.factor(labels),
trControl = train_control,
tuneGrid = grid_default,
method = "xgbTree",
verbose = TRUE
)
xgbpredict_base_short<- predict(xgb_base_short, validation.short)
confusionMatrix(xgbpredict_base_short, as.factor(setBshort$group), positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 7 6
## PH 7 16
##
## Accuracy : 0.6389
## 95% CI : (0.4622, 0.7918)
## No Information Rate : 0.6111
## P-Value [Acc > NIR] : 0.4372
##
## Kappa : 0.2303
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.7273
## Specificity : 0.5000
## Pos Pred Value : 0.6957
## Neg Pred Value : 0.5385
## Prevalence : 0.6111
## Detection Rate : 0.4444
## Detection Prevalence : 0.6389
## Balanced Accuracy : 0.6136
##
## 'Positive' Class : PH
##
Check model accuracy for different tree depths, for nrounds between 100 and 1000
nrounds<- seq(from = 100, to =1000, by = 50)
#tune using caret
tune_grid <- expand.grid(
#nrounds = seq(from = 100, to = 1000, by = 50),
nrounds = nrounds,
eta = c(0.025, 0.05, 0.1, 0.2, 0.3),
max_depth = c(1, 2, 3, 4, 5, 6),
gamma = 0,
colsample_bytree = 1,
min_child_weight = 1,
subsample = 1
)
set.seed(25)
tune_control <- caret::trainControl(
method = "repeatedcv", # cross-validation
number = 10, # with n folds
repeats = 10, #the no. of complete sets of folds to compute
#index = createFolds(tr_treated$Id_clean), # fix the folds
verboseIter = FALSE, # no training log
allowParallel = TRUE,
savePredictions = TRUE
)
set.seed(25)
xgb_tune_short <- caret::train(
x = train.short,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid,
method = "xgbTree",
verbose = TRUE
)
kable(xgb_tune_short$bestTune, caption = "1st tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
10 | 550 | 1 | 0.025 | 0 | 1 | 1 | 1 |
tune_grid2_short <- expand.grid(
nrounds = nrounds,
eta = xgb_tune_short$bestTune$eta,
max_depth = c(xgb_tune_short$bestTune$max_depth -1, xgb_tune_short$bestTune$max_depth, xgb_tune_short$bestTune$max_depth +1),
gamma = 0,
colsample_bytree = 1,
min_child_weight = c(1,2,3,4),
subsample = 1
)
set.seed(25)
xgb_tune2_short <- caret::train(
x = train.short,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid2_short,
method = "xgbTree",
verbose = TRUE
)
kable(xgb_tune2_short$bestTune, caption = "2nd tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
86 | 550 | 1 | 0.025 | 0 | 1 | 1 | 1 |
Minimum Sum of Instance Weight refers to the min_child_weight
parameter
tune_grid3_short <- expand.grid(
nrounds = nrounds,
eta = xgb_tune_short$bestTune$eta,
max_depth = xgb_tune2_short$bestTune$max_depth,
gamma = 0,
colsample_bytree = c(0.4, 0.6, 0.8, 1.0),
min_child_weight = xgb_tune2_short$bestTune$min_child_weight,
subsample = c(0.5, 0.75, 1.0)
)
set.seed(25)
xgb_tune3_short <- caret::train(
x = train.short,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid3_short,
method = "xgbTree",
verbose = TRUE
)
kable(xgb_tune3_short$bestTune, caption = "3rd tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
13 | 700 | 1 | 0.025 | 0 | 0.4 | 1 | 0.5 |
tune_grid4_short <- expand.grid(
nrounds = nrounds,
eta = xgb_tune_short$bestTune$eta,
max_depth = xgb_tune3_short$bestTune$max_depth,
gamma = c(0,0.05, 0.1, 0.5, 0.7, 0.9, 1),
colsample_bytree = xgb_tune3_short$bestTune$colsample_bytree,
min_child_weight = xgb_tune2_short$bestTune$min_child_weight,
subsample = xgb_tune3_short$bestTune$subsample
)
set.seed(25)
xgb_tune4_short <- caret::train(
x = train.short,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid4_short,
method = "xgbTree",
verbose = TRUE
)
kable(xgb_tune4_short$bestTune, caption = "4th tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
119 | 300 | 1 | 0.025 | 1 | 0.4 | 1 | 0.5 |
tune_grid5_short <- expand.grid(
nrounds = seq(from = 100, to = 10000, by = 50),
eta = c(0.01,0.025,0.05,0.1),
max_depth = xgb_tune3_short$bestTune$max_depth,
gamma = xgb_tune4_short$bestTune$gamma,
colsample_bytree = xgb_tune3_short$bestTune$colsample_bytree,
min_child_weight = xgb_tune2_short$bestTune$min_child_weight,
subsample = xgb_tune3_short$bestTune$subsample
)
set.seed(25)
xgb_tune5_short <- caret::train(
x = train.short,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = tune_grid5_short,
method = "xgbTree",
verbose = TRUE
)
kable(xgb_tune5_short$bestTune, caption = "5th tuning, best parameters")%>% kable_styling(full_width = TRUE)
nrounds | max_depth | eta | gamma | colsample_bytree | min_child_weight | subsample | |
---|---|---|---|---|---|---|---|
202 | 200 | 1 | 0.025 | 1 | 0.4 | 1 | 0.5 |
final_grid_short <- expand.grid(
nrounds = xgb_tune5_short$bestTune$nrounds,
eta = xgb_tune5_short$bestTune$eta,
max_depth = xgb_tune5_short$bestTune$max_depth,
gamma = xgb_tune5_short$bestTune$gamma,
colsample_bytree = xgb_tune5_short$bestTune$colsample_bytree,
min_child_weight = xgb_tune5_short$bestTune$min_child_weight,
subsample = xgb_tune5_short$bestTune$subsample
)
set.seed(25)
tune_control_f <- caret::trainControl(
method = "repeatedcv", # cross-validation
number = 10, # with n folds
repeats = 10, #the no. of complete sets of folds to compute
#index = createFolds(tr_treated$Id_clean), # fix the folds
verboseIter = FALSE, # no training log
allowParallel = TRUE,
savePredictions = TRUE,
classProbs = TRUE
)
set.seed(25)
xgb_tune_final_short <- caret::train(
x = train.short,
y = as.factor(labels),
trControl = tune_control_f,
tuneGrid = final_grid_short,
method = "xgbTree",
verbose = TRUE
)
xgbpredictfinalshort<- predict(xgb_tune_final_short, validation.short)
XGBInterim<- confusionMatrix(xgbpredictfinalshort, as.factor(setBshort$group), positive = "PH")
XGBInterim
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 10 2
## PH 4 20
##
## Accuracy : 0.8333
## 95% CI : (0.6719, 0.9363)
## No Information Rate : 0.6111
## P-Value [Acc > NIR] : 0.003604
##
## Kappa : 0.64
##
## Mcnemar's Test P-Value : 0.683091
##
## Sensitivity : 0.9091
## Specificity : 0.7143
## Pos Pred Value : 0.8333
## Neg Pred Value : 0.8333
## Prevalence : 0.6111
## Detection Rate : 0.5556
## Detection Prevalence : 0.6667
## Balanced Accuracy : 0.8117
##
## 'Positive' Class : PH
##
#mir in any feature selection method
all.mirs<- unique(c(lasso.model$rowname[-1],multi.boruta.mirs, as.character(rpartmirs), xgb_mirs$Feature))
#miRs in at least 2 selection methods
mirnames<- duplicated(c(lasso.model$rowname[-1],multi.boruta.mirs, rpartmirs, xgb_mirs$Feature))
mirnames<- c(lasso.model$rowname[-1],multi.boruta.mirs, rpartmirs, xgb_mirs$Feature)[mirnames] %>% unique(.)
miRNA in any feature selection method:
all.mirs
miRNAs in at least 2 feature selection methods:
mirnames
ROC curves for all microRNAs selected by either LASSO, Rpart, Boruta or XGBoost. NB calculated across all HV and PAH , not traning set
#ROC curves for IPAH
library(pROC)
roc.res.i <- list()
for(i in all.mirs){
#Save ROC data
roc.res.i[[i]] <- roc(dataset$group, dataset[,i], plot=F)
plot.roc(roc.res.i[[i]], lwd = 5, cex.axis = 1, cex.lab = 1, cex.main = 1, main=paste("ROC for",i))
}
cutoffs <- find.Cutpoints.Loop(all.mirs, data=ml.Spear[-1], pheno=ml.Spear[,1], healthytag="PH", method='MaxSpSe')
kable(cutoffs, caption = "Cutpoints for miRNAs selected by the above methods, calculated on the training set") %>% kable_styling(full_width = TRUE)
mir | cutpoint | direction | sensitivity | specificity | auc |
---|---|---|---|---|---|
let.7d.3p | 3.085032 | > | 0.6551724 | 0.6428571 | 0.663 |
miR.1306.3p | 2.788380 | > | 0.6206897 | 0.6190476 | 0.65 |
miR.148a.3p | 2.686263 | > | 0.6551724 | 0.6428571 | 0.668 |
miR.187.5p | 2.555543 | > | 0.7241379 | 0.7142857 | 0.782 |
miR.34a.5p | 2.792594 | > | 0.5862069 | 0.5952381 | 0.621 |
miR.451a | 11.518547 | < | 0.6206897 | 0.6428571 | 0.687 |
miR.4707.5p | 2.642439 | > | 0.6551724 | 0.6428571 | 0.66 |
miR.484 | 3.057025 | > | 0.5517241 | 0.5476190 | 0.584 |
miR.494 | 3.870084 | < | 0.6206897 | 0.5238095 | 0.571 |
miR.548am.5p | 2.880562 | > | 0.5517241 | 0.5714286 | 0.541 |
miR.572 | 4.507390 | > | 0.6206897 | 0.5952381 | 0.678 |
miR.636 | 3.408426 | < | 0.7241379 | 0.7380952 | 0.8 |
miR.671.5p | 3.092697 | > | 0.6206897 | 0.6190476 | 0.67 |
miR.18b.5p | 2.627057 | < | 0.6896552 | 0.6190476 | 0.567 |
miR.3613.3p | 3.705675 | < | 0.8275862 | 0.6666667 | 0.746 |
miR.652.3p | 2.537779 | > | 0.6896552 | 0.6904762 | 0.706 |
miR.933 | 2.987387 | < | 0.7586207 | 0.6904762 | 0.737 |
miR.151a.3p | 2.639110 | > | 0.6551724 | 0.6428571 | 0.667 |
miR.1246 | 3.706243 | > | 0.5172414 | 0.5000000 | 0.522 |
miR.125a.5p | 2.687269 | > | 0.6896552 | 0.6904762 | 0.679 |
miR-187-5p
m187cut <- cutoffs %>% filter(mir == "miR.187.5p")
m187tab<- table(ifelse(setB$miR.187.5p > m187cut$cutpoint, "PH", "HV"), setB$group)
(m187tab[1]+m187tab[4])/sum(m187tab)
## [1] 0.7777778
miR-636
m636cut <- cutoffs %>% filter(mir == "miR.636")
m636tab<- table(ifelse(setB$miR.636 < m636cut$cutpoint, "PH","HV"), setB$group)
(m636tab[1]+m636tab[4])/sum(m636tab)
## [1] 0.6944444
compared<- dataset[,c("group", all.mirs)]
sigs<- list()
for(i in 2:(length(compared))) {
sigs[[i]] <- wilcox.test(
compared[compared$group == "HV",i],
compared[compared$group == "PH",i], alternative = "two.sided"
)
}
names(sigs)<- colnames(compared)
sigs <- sigs[-1]
df<- data.frame(mir.name=character(),p.value=double() ,stringsAsFactors = FALSE)
for(i in 1:length(sigs)) {
unlist(sigs[i])
df[i,] <- c(names(sigs[i]),sigs[[i]]$p.value)
}
df$p.adj.BH<- p.adjust(df$p.value, method = "BH")
rownames(df)<- df$mir.name
kable(df[-1], caption = "Wilcox tests, adjusted p-value BH method, miRs selcted by feature selection") %>% kable_styling(full_width = TRUE)
p.value | p.adj.BH | |
---|---|---|
let.7d.3p | 0.421515639091875 | 0.4437007 |
miR.1306.3p | 0.0170342695582462 | 0.0283904 |
miR.148a.3p | 0.0242887381217238 | 0.0373673 |
miR.187.5p | 9.59281571025942e-08 | 0.0000019 |
miR.34a.5p | 0.115801851075843 | 0.1415116 |
miR.451a | 0.00635005507654273 | 0.0127001 |
miR.4707.5p | 0.0022645387494186 | 0.0057824 |
miR.484 | 0.0545909875121561 | 0.0779871 |
miR.494 | 0.351917353704318 | 0.3910193 |
miR.548am.5p | 0.946806105179864 | 0.9468061 |
miR.572 | 0.00231295892090655 | 0.0057824 |
miR.636 | 2.16919244720858e-06 | 0.0000217 |
miR.671.5p | 0.000261797038495134 | 0.0013090 |
miR.18b.5p | 0.0928287163659173 | 0.1237716 |
miR.3613.3p | 2.1595648726422e-05 | 0.0001440 |
miR.652.3p | 0.00178991207196812 | 0.0057824 |
miR.933 | 0.00164133147224172 | 0.0057824 |
miR.151a.3p | 0.00599323424402427 | 0.0127001 |
miR.1246 | 0.120284827480817 | 0.1415116 |
miR.125a.5p | 0.0135653887117804 | 0.0246643 |
Summary statistics for PH group:
#pick mirs selected by any of the 3 methods
selectedmirs <- c("group",all.mirs)
#reduce dataframe to contain only mirs in selectedmirs
selecmirs<- ml.Spear[, colnames(ml.Spear) %in% selectedmirs]
#melt dataframe so ggplot can be used
df<- melt(selecmirs, id.var = "group") %>% mutate (group = factor(group, ))
#print summaries of PH and HV groups
H<- selecmirs[selecmirs$group == "HV",]
PH<- selecmirs[selecmirs$group == "PH",]
summary(PH)
## group let.7d.3p miR.1246 miR.125a.5p miR.1306.3p
## HV: 0 Min. :2.726 Min. :2.595 Min. :2.608 Min. :2.585
## PH:42 1st Qu.:3.035 1st Qu.:3.079 1st Qu.:2.675 1st Qu.:2.727
## Median :3.231 Median :3.704 Median :2.712 Median :2.858
## Mean :3.440 Mean :3.938 Mean :2.879 Mean :2.985
## 3rd Qu.:3.521 3rd Qu.:4.564 3rd Qu.:2.895 3rd Qu.:3.085
## Max. :7.135 Max. :6.123 Max. :5.046 Max. :4.329
## miR.148a.3p miR.151a.3p miR.187.5p miR.18b.5p
## Min. :2.581 Min. :2.577 Min. :2.473 Min. :2.576
## 1st Qu.:2.662 1st Qu.:2.632 1st Qu.:2.545 1st Qu.:2.601
## Median :2.710 Median :2.653 Median :2.695 Median :2.618
## Mean :2.831 Mean :2.810 Mean :2.829 Mean :2.635
## 3rd Qu.:2.903 3rd Qu.:2.829 3rd Qu.:2.833 3rd Qu.:2.644
## Max. :3.771 Max. :4.406 Max. :5.340 Max. :2.787
## miR.34a.5p miR.3613.3p miR.451a miR.4707.5p
## Min. :2.682 Min. :2.882 Min. : 6.925 Min. :2.499
## 1st Qu.:2.754 1st Qu.:3.102 1st Qu.: 9.349 1st Qu.:2.571
## Median :2.808 Median :3.517 Median :10.379 Median :2.737
## Mean :2.895 Mean :3.828 Mean :10.509 Mean :2.869
## 3rd Qu.:2.951 3rd Qu.:4.082 3rd Qu.:11.861 3rd Qu.:3.073
## Max. :3.478 Max. :8.708 Max. :14.237 Max. :3.648
## miR.484 miR.494 miR.548am.5p miR.572
## Min. :2.707 Min. :2.642 Min. :2.688 Min. :2.541
## 1st Qu.:2.995 1st Qu.:2.945 1st Qu.:2.807 1st Qu.:4.199
## Median :3.079 Median :3.777 Median :2.884 Median :4.743
## Mean :3.113 Mean :3.993 Mean :2.997 Mean :4.673
## 3rd Qu.:3.185 3rd Qu.:4.544 3rd Qu.:3.038 3rd Qu.:5.231
## Max. :3.821 Max. :8.154 Max. :4.141 Max. :7.474
## miR.636 miR.652.3p miR.671.5p miR.933
## Min. :2.721 Min. :2.498 Min. :2.492 Min. :2.601
## 1st Qu.:3.006 1st Qu.:2.533 1st Qu.:2.919 1st Qu.:2.832
## Median :3.221 Median :2.564 Median :3.399 Median :2.938
## Mean :3.271 Mean :2.629 Mean :4.557 Mean :2.966
## 3rd Qu.:3.407 3rd Qu.:2.631 3rd Qu.:6.167 3rd Qu.:3.016
## Max. :4.954 Max. :3.596 Max. :9.917 Max. :3.670
Summary statistics for healthy volunteers:
summary(H)
## group let.7d.3p miR.1246 miR.125a.5p miR.1306.3p
## HV:29 Min. :2.752 Min. : 2.649 Min. :2.626 Min. :2.581
## PH: 0 1st Qu.:2.964 1st Qu.: 3.060 1st Qu.:2.668 1st Qu.:2.676
## Median :3.058 Median : 3.706 Median :2.678 Median :2.758
## Mean :3.132 Mean : 4.067 Mean :2.714 Mean :2.806
## 3rd Qu.:3.136 3rd Qu.: 4.475 3rd Qu.:2.702 3rd Qu.:2.825
## Max. :4.537 Max. :13.529 Max. :3.657 Max. :3.545
## miR.148a.3p miR.151a.3p miR.187.5p miR.18b.5p
## Min. :2.612 Min. :2.593 Min. :2.453 Min. :2.586
## 1st Qu.:2.650 1st Qu.:2.620 1st Qu.:2.496 1st Qu.:2.607
## Median :2.681 Median :2.628 Median :2.534 Median :2.632
## Mean :2.691 Mean :2.657 Mean :2.558 Mean :2.631
## 3rd Qu.:2.696 3rd Qu.:2.649 3rd Qu.:2.576 3rd Qu.:2.651
## Max. :2.998 Max. :3.215 Max. :3.024 Max. :2.684
## miR.34a.5p miR.3613.3p miR.451a miR.4707.5p
## Min. :2.649 Min. :3.455 Min. : 7.365 Min. :2.482
## 1st Qu.:2.751 1st Qu.:3.752 1st Qu.:10.870 1st Qu.:2.583
## Median :2.791 Median :4.042 Median :12.053 Median :2.631
## Mean :2.798 Mean :4.394 Mean :11.645 Mean :2.638
## 3rd Qu.:2.835 3rd Qu.:4.970 3rd Qu.:12.649 3rd Qu.:2.680
## Max. :3.087 Max. :6.831 Max. :14.218 Max. :2.836
## miR.484 miR.494 miR.548am.5p miR.572
## Min. :2.765 Min. :2.637 Min. :2.701 Min. :2.620
## 1st Qu.:2.917 1st Qu.:3.421 1st Qu.:2.818 1st Qu.:3.798
## Median :3.047 Median :3.942 Median :2.865 Median :4.372
## Mean :3.067 Mean :4.426 Mean :2.908 Mean :4.203
## 3rd Qu.:3.131 3rd Qu.:5.366 3rd Qu.:2.933 3rd Qu.:4.681
## Max. :3.849 Max. :8.353 Max. :3.563 Max. :5.936
## miR.636 miR.652.3p miR.671.5p miR.933
## Min. :2.737 Min. :2.506 Min. :2.538 Min. :2.796
## 1st Qu.:3.349 1st Qu.:2.523 1st Qu.:2.638 1st Qu.:2.987
## Median :3.825 Median :2.534 Median :2.852 Median :3.056
## Mean :3.950 Mean :2.543 Mean :3.699 Mean :3.143
## 3rd Qu.:4.382 3rd Qu.:2.542 3rd Qu.:3.756 3rd Qu.:3.245
## Max. :5.509 Max. :2.737 Max. :9.023 Max. :3.800
ggplot(data = df, aes(x= variable, y=value)) + geom_boxplot(aes(fill=group)) + theme(text = element_text(size=12), axis.text.x=element_text(angle=90, vjust=0.5, size = 12),axis.text.y=element_text(size = 12)) + labs(x="", y="Expression", size = 12)
#melt dataframe into SSc, SSc-no-ph, IPAH, HV
selectedmirs <- c("PHstatus",all.mirs)
#reduce dataset to contain only mirs in selectedmirs - all subjects not just set A
selecmirs2<- dataset[,which(colnames(dataset) %in% selectedmirs)]
dfSSc<- melt(selecmirs2, id.var = "PHstatus")
dfSSc$PHstatus<- factor(dfSSc$PHstatus, levels = c("HV","No_PH_CTD","CTD_PAH","IPAH"))
dfSScPH<- dfSSc[dfSSc$PHstatus == 'CTD_PAH' | dfSSc$PHstatus == 'IPAH', ]
ggplot(data = dfSScPH, aes(x= variable, y=value)) + geom_boxplot(aes(fill=PHstatus)) + theme(axis.text.x=element_text(angle=90, vjust=0.5, size = 12),axis.text.y=element_text(size = 12)) + labs(x="", y="Expression")
ggplot(data = dfSSc, aes(x= variable, y=value)) + geom_boxplot(aes(fill=PHstatus)) + theme(axis.text.x=element_text(angle=90, vjust=0.5, size = 12),axis.text.y=element_text(size = 12)) + labs(x="", y="Expression")
#melt dataframe into SSc, SSc-no-ph, IPAH, HV
mormirs <- c("group",unique(mirnames))
#reduce datset to contain only mirs in selectedmirs
moremirs<- dataset[,mormirs]
moremirs<- melt(moremirs, id.var = "group")
ggplot(data = moremirs, aes(x= variable, y=value)) + geom_boxplot(aes(fill=group)) + theme(axis.text.x=element_text(angle=90, vjust=0.5, size = 12),axis.text.y=element_text(size = 12),axis.title.y=element_text(size=12)) + labs(x="", y="Expression")
moremirsAB<- dataset[,which(colnames(dataset) %in% c(unique(mirnames), "AB"))]
AB<- moremirsAB %>% melt(id.var = "AB")
ggplot(data = AB, aes(x= variable, y=value)) + geom_boxplot(aes(fill=AB)) + theme(axis.text.x=element_text(angle=90, vjust=0.5, size = 12),axis.text.y=element_text(size = 12)) + labs(x="", y="Expression")
final2<- moremirs[moremirs$variable =="miR-636" | moremirs$variable =="miR-187-5p" ,]
#ggplot(data = final3, aes(x= variable, y=value)) + geom_boxplot(aes(fill=group),size=1.2) + theme(axis.text.x=element_text(angle=60, vjust=0.5, size = 16),axis.text.y=element_text(size = 12),axis.title.y=element_text(size=12), legend.text = element_text(size = 12)) + labs(x="", y="Expression")
boruta.split<- split(fit.Boruta$pred, fit.Boruta$pred$Resample)
parallel::mclapply(1:100, function(d) {
pROC::auc(pROC::roc(predictor = boruta.split[[d]]$PH, response = boruta.split[[d]]$obs))[1]
}) %>% unlist() %>% hist(main = paste("Boruta CV mean AUC:", round(mean(.),2)))
rpart.auc<- list()
rpart.split<- split(fit.caret.rpart$pred, fit.caret.rpart$pred$Resample)
parallel::mclapply(1:100, function(d) {
pROC::auc(pROC::roc(predictor = rpart.split[[d]]$PH, response = rpart.split[[d]]$obs))[1]
}) %>% unlist() %>% hist(main = paste("Rpart CV mean AUC:", round(mean(.),2)))
LASSO.split.min<- split(LASSO.min$pred, LASSO.min$pred$Resample)
parallel::mclapply(1:100, function(d) {
pROC::auc(pROC::roc(predictor = LASSO.split.min[[d]]$PH, response = LASSO.split.min[[d]]$obs))[1]
}) %>% unlist() %>% hist(main = paste("LASSO CV mean AUC:", round(mean(.),2)))
tosplit<- xgb_tune_final_short
XGB.split<- split(tosplit$pred, tosplit$pred$Resample)
parallel::mclapply(1:100, function(d) {
pROC::auc(pROC::roc(predictor = XGB.split[[d]]$PH, response = XGB.split[[d]]$obs))[1]
}) %>% unlist() %>% hist(main = paste("XGBoost CV mean AUC:", round(mean(.),2)))
Boruta.perf<- predict(fit.Boruta, tree.Boruta[-1], type= "prob")
Boruta.pred<- prediction(Boruta.perf$PH, tree.Boruta$group)
Boruta.perfs<- ROCR::performance(Boruta.pred,"tpr","fpr")
Boruta.sens<- ROCR::performance(Boruta.pred,"sens","spec")
Boruta.auc <- pROC::auc(tree.Boruta$group, Boruta.perf[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
Boruta.aucCI<- pROC::ci.auc(tree.Boruta$group, Boruta.perf[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
par(mfrow=c(1,2))
plot(Boruta.perfs, main = paste("AUC:", round(Boruta.auc,2)))
plot(Boruta.sens)
Boruta.aucCI
## 95% CI: 0.6902-0.9981 (DeLong)
rpart.perf<- predict(fit.caret.rpart, setB2[-1], type= "prob")
rpart.pred<- prediction(rpart.perf$PH, setB2$group)
rpart.perfs<- ROCR::performance(rpart.pred,"tpr","fpr")
rpart.sens<- ROCR::performance(rpart.pred,"sens","spec")
rpart.auc<- pROC::auc(setB2$group, rpart.perf[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
rpart.aucCI<- pROC::ci.auc(setB2$group, rpart.perf[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
par(mfrow=c(1,2))
plot(rpart.perfs, main = paste("AUC:", round(rpart.auc,2)))
plot(rpart.sens)
rpart.aucCI
## 95% CI: 0.6321-0.9458 (DeLong)
LASSO.probs.min <- predict(LASSO.min, newdata=val.LASSO[-1], type = "prob")
LASSO.pred.min<- prediction(LASSO.probs.min$PH, val.LASSO$group)
LASSO.perfs.min<- ROCR::performance(LASSO.pred.min,"tpr","fpr")
LASSO.sens.min<- ROCR::performance(LASSO.pred.min,"sens","spec")
LASSO.auc.min<- pROC::auc(val.LASSO$group, LASSO.probs.min[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
LASSO.aucCI<- pROC::ci.auc(val.LASSO$group, LASSO.probs.min[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
par(mfrow=c(1,2))
plot(LASSO.perfs.min, main = paste("AUC:", round(LASSO.auc.min,2)))
plot(LASSO.sens.min)
LASSO.aucCI
## 95% CI: 0.6343-0.9436 (DeLong)
xgbpreds<- predict(xgb_tune_final_short, validation.short, type = "prob")
xgb.pred<- prediction(xgbpreds$PH, as.factor(setBshort$group))
xgb.perfs<- ROCR::performance(xgb.pred,"tpr","fpr")
xgb.sens<- ROCR::performance(xgb.pred,"sens","spec")
xgb.auc<- pROC::auc(as.factor(setBshort$group), xgbpreds[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
xgb.aucCI<- pROC::ci.auc(as.factor(setBshort$group), xgbpreds[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
par(mfrow=c(1,2))
plot(xgb.perfs, main = paste("AUC:", round(xgb.auc,2)))
plot(xgb.sens)
xgb.aucCI
## 95% CI: 0.6576-0.9852 (DeLong)
plot(xgb.perfs, col = "blue", lwd = 2.5, cex = 2)
plot(LASSO.perfs.min, col = "light blue", add = TRUE, lwd = 2.5)
plot(rpart.perfs, col = "dark blue", add = TRUE, lwd = 2.5)
plot(Boruta.perfs, col = "turquoise", add = TRUE, lwd = 2.5)
abline(0,1, lty= 2, lwd = 2)
legend("bottomright", legend = c("XGBoost", "LASSO", "Rpart", "Random Forest"), col = c("blue", "light blue", "dark blue", "turquoise"), lty = 1, inset = 0.1, lwd = 2.5)
kable(multi.boruta.mirs, col.names = paste("Boruta miRs: (",length(multi.boruta.mirs),")"))
Boruta miRs: ( 10 ) |
---|
let.7d.3p |
miR.187.5p |
miR.18b.5p |
miR.3613.3p |
miR.4707.5p |
miR.572 |
miR.636 |
miR.652.3p |
miR.671.5p |
miR.933 |
kable(as.character(rpartmirs), col.names = paste("Rpart miRs: (",length(rpartmirs),")"))
Rpart miRs: ( 4 ) |
---|
miR.187.5p |
miR.636 |
miR.151a.3p |
miR.1246 |
kable(lasso.model$rowname[-1], col.names = paste("LASSO miRs: (", length(lasso.model$rowname[-1]),")"))
LASSO miRs: ( 13 ) |
---|
let.7d.3p |
miR.1306.3p |
miR.148a.3p |
miR.187.5p |
miR.34a.5p |
miR.451a |
miR.4707.5p |
miR.484 |
miR.494 |
miR.548am.5p |
miR.572 |
miR.636 |
miR.671.5p |
kable(xgb_mirs$Feature, col.names = paste("XGBoost miRs: (",length(xgb_mirs$Feature),")"))
XGBoost miRs: ( 8 ) |
---|
miR.636 |
miR.187.5p |
let.7d.3p |
miR.3613.3p |
miR.4707.5p |
miR.18b.5p |
miR.572 |
miR.125a.5p |
paste("Unique miRs:", length(unique(c(multi.boruta.mirs, rpartmirs, lasso.model$rowname[-1], xgb_mirs$Feature))))
## [1] "Unique miRs: 20"
library(VennDiagram)
v1<- venn.diagram(x=list(A= as.vector(lasso.model$rowname),B=as.vector(rpartmirs),C=as.vector(multi.boruta.mirs), D=as.vector(xgb_mirs$Feature)), category = c("LASSO miRs","Rpart miRs","Random forest miRs","XGBoost miRs"), fill = c("skyblue","pink1","mediumorchid","dark blue"), lty = "blank", fontfamily = "sans", cat.fontfamily = "sans", filename=NULL, simplify=TRUE, cex = 1.3, cat.cex = 1.3)
grid.newpage()
grid.draw(v1)
totalprobs<- data.frame(patientID = rownames(Boruta.perf), group = setB$group, RandomForest = Boruta.perf$PH, rpart = rpart.perf$PH, LASSO = LASSO.probs.min$PH, XGB = xgbpreds$PH)
totalprobs <- mutate(totalprobs, Mean = rowMeans(totalprobs[3:6]))
totalprobs$EnsemblePreds <- ifelse(totalprobs$Mean > 0.5, "PH", "HV")
Ensemble.pred<-prediction(totalprobs$Mean, totalprobs$group)
Ensemble.perfs<- ROCR::performance(Ensemble.pred, "tpr", "fpr")
kable(totalprobs) %>% kable_styling(full_width = TRUE)
patientID | group | RandomForest | rpart | LASSO | XGB | Mean | EnsemblePreds |
---|---|---|---|---|---|---|---|
773_1_3.txt | PH | 0.861 | 0.8666667 | 0.8610238 | 0.9266345 | 0.8788313 | PH |
988_2_1.txt | HV | 0.625 | 0.8666667 | 0.6270985 | 0.8060075 | 0.7311932 | PH |
987_2_2.txt | PH | 0.867 | 0.9230769 | 0.9681849 | 0.7909521 | 0.8873035 | PH |
988_2_2.txt | HV | 0.642 | 0.8666667 | 0.7828652 | 0.8272272 | 0.7796898 | PH |
988_2_3.txt | PH | 0.469 | 0.8666667 | 0.6646667 | 0.4601494 | 0.6151207 | PH |
991_1_1.txt | PH | 0.892 | 0.9230769 | 0.9760982 | 0.9143730 | 0.9263870 | PH |
994_2_4.txt | HV | 0.112 | 0.0454545 | 0.4433199 | 0.1719547 | 0.1931823 | HV |
756_1_4.txt | PH | 0.827 | 0.9230769 | 0.9729316 | 0.8753925 | 0.8996003 | PH |
780_2_1.txt | HV | 0.588 | 0.9230769 | 0.5069344 | 0.3914426 | 0.6023635 | PH |
756_1_1.txt | PH | 0.805 | 0.9230769 | 0.8110336 | 0.8562466 | 0.8488393 | PH |
990_1_4.txt | PH | 0.908 | 0.9230769 | 0.8860309 | 0.9222433 | 0.9098378 | PH |
990_1_3.txt | PH | 0.671 | 0.9230769 | 0.4652240 | 0.7577598 | 0.7042652 | PH |
759_2_4.txt | PH | 0.918 | 0.9230769 | 0.7153382 | 0.7805382 | 0.8342383 | PH |
760_1_1.txt | PH | 0.888 | 0.9230769 | 0.9594336 | 0.7051297 | 0.8689100 | PH |
990_1_1.txt | PH | 0.799 | 0.8666667 | 0.7554185 | 0.8007538 | 0.8054598 | PH |
756_2_2.txt | PH | 0.621 | 0.8666667 | 0.7759468 | 0.6305977 | 0.7235528 | PH |
991_2_3.txt | PH | 0.858 | 0.9230769 | 0.8211853 | 0.8018932 | 0.8510389 | PH |
989_2_2.txt | PH | 0.713 | 0.8666667 | 0.7855566 | 0.7922395 | 0.7893657 | PH |
760_2_1.txt | PH | 0.710 | 0.0454545 | 0.6660852 | 0.7215491 | 0.5357722 | PH |
770_1_2.txt | PH | 0.877 | 0.9230769 | 0.8527485 | 0.9214918 | 0.8935793 | PH |
779_1_2.txt | PH | 0.713 | 0.9230769 | 0.5425231 | 0.7151947 | 0.7234487 | PH |
779_1_4.txt | PH | 0.631 | 0.9230769 | 0.2968497 | 0.5210212 | 0.5929869 | PH |
986_1_4.txt | PH | 0.540 | 0.0454545 | 0.3144224 | 0.5690590 | 0.3672340 | HV |
994_1_3.txt | HV | 0.240 | 0.0454545 | 0.0996630 | 0.2303311 | 0.1538622 | HV |
994_1_4.txt | HV | 0.251 | 0.0454545 | 0.1631164 | 0.2609853 | 0.1801391 | HV |
980_1_1.txt | HV | 0.327 | 0.0454545 | 0.2625634 | 0.2860165 | 0.2302586 | HV |
980_1_2.txt | HV | 0.251 | 0.0454545 | 0.3797412 | 0.4075463 | 0.2709355 | HV |
982_1_3.txt | HV | 0.489 | 0.0454545 | 0.4489525 | 0.4375283 | 0.3552338 | HV |
758_1_2.txt | HV | 0.298 | 0.0454545 | 0.3030106 | 0.2726333 | 0.2297746 | HV |
982_1_1.txt | HV | 0.420 | 0.0454545 | 0.5840516 | 0.4051780 | 0.3636710 | HV |
780_1_1.txt | HV | 0.396 | 0.9230769 | 0.4244571 | 0.4439179 | 0.5468630 | PH |
773_2_1.txt | HV | 0.942 | 0.9230769 | 0.9280066 | 0.8514320 | 0.9111289 | PH |
767_2_3.txt | HV | 0.492 | 0.0454545 | 0.2876439 | 0.5135296 | 0.3346570 | HV |
770_2_2.txt | PH | 0.893 | 0.9230769 | 0.9282741 | 0.8327190 | 0.8942675 | PH |
767_2_4.txt | PH | 0.255 | 0.8000000 | 0.2361473 | 0.2724249 | 0.3908930 | HV |
774_1_3.txt | PH | 0.401 | 0.9230769 | 0.3690218 | 0.5977038 | 0.5727006 | PH |
pROC::auc(totalprobs$group, totalprobs$Mean)
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
## Area under the curve: 0.8474
pROC::ci.auc(totalprobs$group, totalprobs$Mean)
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
## 95% CI: 0.695-0.9998 (DeLong)
table(totalprobs$EnsemblePreds, totalprobs$group)
##
## HV PH
## HV 9 2
## PH 5 20
RFvars<-varImp(fit.Boruta)[[1]]%>% rownames_to_column() %>% select(miR = rowname, RFIMP = Overall)
rpartvars<- varImp(fit.caret.rpart)[[1]] %>% filter(Overall > 0) %>% rownames_to_column() %>% select(miR = rowname, rpartIMP = Overall)
LASSOvars<- varImp(LASSO.min)[[1]] %>% filter(Overall > 0)%>% rownames_to_column() %>% select(miR = rowname, LASSOIMP = Overall)
XGBvars<- varImp(xgb_tune_final_short)[[1]]%>% rownames_to_column() %>% select(miR = rowname, XGBIMP = Overall)
n<- max(dim(RFvars)[1], dim(rpartvars)[1], dim(LASSOvars)[1], dim(XGBvars)[1])
allimps <- dplyr::full_join(x = RFvars, y= rpartvars) %>% full_join(x=., y=LASSOvars) %>% full_join(x=., XGBvars)
## Joining, by = "miR"
## Joining, by = "miR"
## Joining, by = "miR"
plot(varImp(fit.Boruta))
plot(varImp(fit.caret.rpart))
plot(varImp(LASSO.min))
plot(varImp(xgb_tune_final_short))
reshaped<- melt(allimps, id.vars = 1)
ggplot(reshaped, aes(x=miR, y = value)) + geom_bar(aes(fill=variable),stat="identity",position="dodge") + theme(axis.text.x=element_text(angle=90, vjust=0.5, size = 12), panel.background = element_blank(), panel.grid = element_line(colour="grey")) + labs(x="", y="Importance", size=12) + scale_fill_brewer(palette = "RdYlBu")
## Warning: Removed 45 rows containing missing values (geom_bar).
ggplot(reshaped, aes(x=miR, y = value)) + geom_bar(aes(fill=variable),stat="identity") + theme(axis.text.x=element_text(angle=90, vjust=0.5, size = 12), panel.background = element_blank(), panel.grid = element_line(colour="grey")) + labs(x="", y="Importance", size=12) + scale_fill_manual(name = "", labels = c("Random Forest", "rpart", "LASSO", "XGBoost"), values = c("dark red", "tomato2", "midnight blue", "cornflowerblue"))
## Warning: Removed 45 rows containing missing values (position_stack).
Figure includes training and validation set.
#melt dataframe into SSc, SSc-no-ph, IPAH, HV
mormirs <- c("group",mirnames)
#reduce dataset to contain only mirs in selectedmirs
moremirs<- dataset[,mormirs]
#Mean centre data:
centre_colmeans <- function(x) {
xcentre = colMeans(x)
x - rep(xcentre, rep.int(nrow(x), ncol(x)))
}
meanc<- centre_colmeans(moremirs[-1])
meanc$group <- moremirs$group
meanc.melt<- melt(meanc, id.var = "group")
ggplot(data = meanc.melt, aes(x= variable, y=value)) + geom_boxplot(aes(fill=group)) + theme(axis.text.x=element_text(angle=90, vjust=0.5, size = 12),axis.text.y=element_text(size = 12),axis.title.y=element_text(size=12), panel.background = element_rect(fill = 'white'), axis.line = element_line(colour = 'black')) + labs(x="", y="Mean centered expression") + scale_colour_manual(values = c("Dark Blue", "Light Blue")) + scale_fill_manual(values = c("Midnight blue", "cornflowerblue"))
mirheat<- data.frame(Boruta = all.mirs, Rpart = all.mirs, LASSO = all.mirs, XGBoost = all.mirs, stringsAsFactors = FALSE)
rownames(mirheat)<- all.mirs
mirheat$Boruta <- ifelse(mirheat$Boruta %in% multi.boruta.mirs, "miR selected", "miR not selected")
mirheat$Rpart<- ifelse(mirheat$Rpart %in% as.character(rpartmirs),"miR selected", "miR not selected")
mirheat$LASSO <- ifelse(mirheat$LASSO %in% lasso.model$rowname[-1], "miR selected", "miR not selected")
mirheat$XGBoost <- ifelse(mirheat$XGBoost %in% xgb_mirs$Feature,"miR selected", "miR not selected")
library(pheatmap)
colors <- colorRampPalette(brewer.pal(9, "Blues"))(255)
my_colour = list(
XGBoost = c(`miR selected` = "midnightblue", `miR not selected`= "cornsilk1"),
LASSO = c(`miR selected` = "midnightblue", `miR not selected`= "cornsilk1"),
Rpart = c(`miR selected` = "midnightblue", `miR not selected`= "cornsilk1"),
Boruta = c(`miR selected` = "midnightblue", `miR not selected`= "cornsilk1")
)
forheatmap<- dataset[,colnames(dataset) %in% all.mirs]
sampleDists<- cor(forheatmap, method = "spearman")
sampleDists<- abs(sampleDists)
sampleDistsM<- as.matrix(sampleDists)
pheatmap(sampleDistsM, annotation_row = mirheat, col = colors, annotation_colors = my_colour, clustering_method = "average")
Classification of patients based on:
Normal <125 pg/ml in under 75's
Normal <450 pg/ml in over 75's
library(readxl)
NTproBNP <- as.data.frame(read_excel("~/Google Drive File Stream/My Drive/miRNA/NTproBNP.xlsx"))
rownames(NTproBNP)<- NTproBNP$filename
table(NTproBNP[,6:7])
## Predicted
## Status HV PH
## HV 21 4
## PH 2 42
dataset$filename <- rownames(dataset)
withBNP<- dplyr::left_join(dataset, NTproBNP, by = "filename")
rownames(withBNP)<- withBNP$filename
withBNP<- dplyr::select(withBNP, -biobankid, -uid, -filename, -Status, -Predicted, -PHstatus, -Normal.range)
withBNPa<- withBNP[withBNP$AB == "A",] %>% select(-AB) %>% filter(NTproBNP > 0)
withBNPb<- withBNP[withBNP$AB == "B",]%>% select(-AB) %>% filter(NTproBNP > 0)
withBNPa$group <- as.factor(withBNPa$group)
proBNP<- glm(group ~ NTproBNP, data = withBNPa, family = "binomial")
summary(proBNP)
##
## Call:
## glm(formula = group ~ NTproBNP, family = "binomial", data = withBNPa)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8404 -0.6935 0.0031 0.3460 1.7358
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.489534 0.605324 -2.461 0.0139 *
## NTproBNP 0.004907 0.001966 2.496 0.0126 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 58.704 on 43 degrees of freedom
## Residual deviance: 31.976 on 42 degrees of freedom
## AIC: 35.976
##
## Number of Fisher Scoring iterations: 8
withBNPb$group<- as.factor(withBNPb$group)
NTproBNPprob<- predict(proBNP, newdata = withBNPb, type = "response")
NTproBNPpred<- rep("HV", dim(withBNPb)[1])
NTproBNPpred[NTproBNPprob > 0.5] = "PH"
NTproBNPpred<- as.factor(NTproBNPpred)
confusionMatrix(NTproBNPpred,withBNPb$group, positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 8 4
## PH 0 12
##
## Accuracy : 0.8333
## 95% CI : (0.6262, 0.9526)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.05935
##
## Kappa : 0.6667
##
## Mcnemar's Test P-Value : 0.13361
##
## Sensitivity : 0.7500
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.6667
## Prevalence : 0.6667
## Detection Rate : 0.5000
## Detection Prevalence : 0.5000
## Balanced Accuracy : 0.8750
##
## 'Positive' Class : PH
##
pred.BNP<- prediction(NTproBNPprob, as.factor(withBNPb$group), label.ordering = c("HV","PH"))
perfs.BNP<- ROCR::performance(pred.BNP,"tpr","fpr")
sens.BNP<- ROCR::performance(pred.BNP,"sens","spec")
auc.BNP<- pROC::auc(as.factor(withBNPb$group), NTproBNPprob)
aucCI.BNP<- pROC::ci.auc(as.factor(withBNPb$group), NTproBNPprob)
aucCI.BNP
## 95% CI: 0.8404-1 (DeLong)
par(mfrow=c(1,2))
plot(perfs.BNP, main = paste("AUC:", round(auc.BNP,2)))
plot(sens.BNP)
Add NTproBNP to existing models, using continuous NTproBNP
m2<- paste(m, "+", "NTproBNP")
RFm2<- as.formula(paste("group ~ ",m2,sep = ""))
Boruta.data2<- withBNPa[,c("group",multi.boruta.mirs, "NTproBNP")]
tree.Boruta2<- withBNPb[,c("group", multi.boruta.mirs, "NTproBNP")]
tunegrid <- expand.grid(.mtry=1)
control<- trainControl(method = 'repeatedcv',
number = 10,
repeats = 10,
savePredictions = TRUE,
classProbs = TRUE)
fit.Boruta2 <- caret::train(RFm2, data=Boruta.data2, method="rf", metric="Accuracy", tuneGrid=tunegrid, trControl=control, ntree=ntree)
RFBoruta.train2 <- predict(fit.Boruta2, tree.Boruta2)
confusionMatrix(as.factor(RFBoruta.train2), tree.Boruta2$group, positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 6 0
## PH 2 16
##
## Accuracy : 0.9167
## 95% CI : (0.73, 0.9897)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.004871
##
## Kappa : 0.8
##
## Mcnemar's Test P-Value : 0.479500
##
## Sensitivity : 1.0000
## Specificity : 0.7500
## Pos Pred Value : 0.8889
## Neg Pred Value : 1.0000
## Prevalence : 0.6667
## Detection Rate : 0.6667
## Detection Prevalence : 0.7500
## Balanced Accuracy : 0.8750
##
## 'Positive' Class : PH
##
rpartBNPdata<- withBNPa[,colnames(withBNPa) %in% c("group", rpartmirs, "NTproBNP")]
tc <- trainControl(method="repeatedcv", number=10, repeats = 10, classProbs=TRUE, summaryFunction = twoClassSummary, savePredictions = TRUE)
set.seed(100)
fit.caret.rpart2 <- caret::train(group ~ ., data=rpartBNPdata, method='rpart', metric="ROC", trControl=tc, control=rpart.control(minsplit=2, minbucket=3, surrogatestyle = 1, maxcompete = 0))
fit.caret.rpart2$bestTune
## cp
## 2 0.4117647
prp(fit.caret.rpart2$finalModel, main="PH from HV Rpart model", extra=2, varlen=0)
plot(as.party(fit.caret.rpart2$finalModel), main="PH from HV Rpart model", drop_terminal=F)
predrpart2 <- predict(fit.caret.rpart2, withBNPb)
predrpart2<- as.character(predrpart2)
confusionMatrix(as.factor(predrpart2), withBNPb$group, positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 6 2
## PH 2 14
##
## Accuracy : 0.8333
## 95% CI : (0.6262, 0.9526)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.05935
##
## Kappa : 0.625
##
## Mcnemar's Test P-Value : 1.00000
##
## Sensitivity : 0.8750
## Specificity : 0.7500
## Pos Pred Value : 0.8750
## Neg Pred Value : 0.7500
## Prevalence : 0.6667
## Detection Rate : 0.5833
## Detection Prevalence : 0.6667
## Balanced Accuracy : 0.8125
##
## 'Positive' Class : PH
##
newLASSO <- withBNPa[, colnames(withBNPa) %in% c(colnames(ml.Spear.LASSO), "NTproBNP")]
tuneLASSO<- expand.grid(.alpha = 1, .lambda = fit.glmcv$lambda.min)
LASSO.min2<- caret::train(group ~ ., data = newLASSO, method = "glmnet", trControl = control, family = 'binomial', tuneGrid = tuneLASSO)
lasso.model2<- coef(LASSO.min2$finalModel, LASSO.min2$bestTune$lambda) %>% as.matrix() %>% as.data.frame() %>% rownames_to_column %>% filter(abs(`1`) >0)
val.LASSO2<- withBNPb[, colnames(withBNPb) %in% colnames(newLASSO)]
LASSO.pred.min2 <- predict(LASSO.min2, newdata=val.LASSO2[-1])
predicted.classes<-as.character(LASSO.pred.min2)
confusionMatrix(LASSO.pred.min2,val.LASSO2$group, positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 5 3
## PH 3 13
##
## Accuracy : 0.75
## 95% CI : (0.5329, 0.9023)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.2632
##
## Kappa : 0.4375
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.8125
## Specificity : 0.6250
## Pos Pred Value : 0.8125
## Neg Pred Value : 0.6250
## Prevalence : 0.6667
## Detection Rate : 0.5417
## Detection Prevalence : 0.6667
## Balanced Accuracy : 0.7188
##
## 'Positive' Class : PH
##
train2<- withBNPa[, colnames(withBNPa) %in% c(colnames(train.short), "NTproBNP")]
train2<- data.matrix(train2)
validation2<- withBNPb[, colnames(withBNPb) %in% c(colnames(train.short), "NTproBNP")]
validation2<- data.matrix(validation2)
labels<- withBNPa$group
labelsB <- withBNPb$group
set.seed(25)
xgb_tune_final_short2 <- caret::train(
x = train2,
y = as.factor(labels),
trControl = tune_control,
tuneGrid = final_grid_short,
method = "xgbTree",
verbose = TRUE
)
xgbpredictfinalshort2<- predict(xgb_tune_final_short2, validation2)
confusionMatrix(xgbpredictfinalshort2, as.factor(labelsB), positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 6 1
## PH 2 15
##
## Accuracy : 0.875
## 95% CI : (0.6764, 0.9734)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.0199
##
## Kappa : 0.7097
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.9375
## Specificity : 0.7500
## Pos Pred Value : 0.8824
## Neg Pred Value : 0.8571
## Prevalence : 0.6667
## Detection Rate : 0.6250
## Detection Prevalence : 0.7083
## Balanced Accuracy : 0.8438
##
## 'Positive' Class : PH
##
Boruta.perf2<- predict(fit.Boruta2, tree.Boruta2[-1], type= "prob")
Boruta.pred2<- prediction(Boruta.perf2$PH, tree.Boruta2$group)
Boruta.perfs2<- ROCR::performance(Boruta.pred2,"tpr","fpr")
Boruta.sens2<- ROCR::performance(Boruta.pred2,"sens","spec")
Boruta.auc2<- pROC::auc(tree.Boruta2$group, Boruta.perf2[,2])
Boruta.aucCI2<- pROC::ci.auc(tree.Boruta2$group, Boruta.perf2[,2])
par(mfrow=c(1,2))
plot(Boruta.perfs2, main = paste("AUC:", round(Boruta.auc2,2)))
plot(Boruta.sens2)
Boruta.aucCI2
## 95% CI: 0.9211-1 (DeLong)
rpart.perf2<- predict(fit.caret.rpart2, withBNPb[-1], type= "prob")
rpart.pred2<- prediction(rpart.perf2$PH, withBNPb$group)
rpart.perfs2<- ROCR::performance(rpart.pred2,"tpr","fpr")
rpart.sens2<- ROCR::performance(rpart.pred2,"sens","spec")
rpart.auc2<- pROC::auc(withBNPb$group, rpart.perf2[,2])
rpart.aucCI2<- pROC::ci.auc(withBNPb$group, rpart.perf2[,2])
par(mfrow=c(1,2))
plot(rpart.perfs2, main = paste("AUC:", round(rpart.auc2,2)))
plot(rpart.sens2)
rpart.aucCI2
## 95% CI: 0.6316-0.9934 (DeLong)
LASSO.probs.min2 <- predict(LASSO.min2, newdata=withBNPb[-1], type = "prob")
LASSO.pred.min2<- prediction(LASSO.probs.min2$PH, withBNPb$group)
LASSO.perfs.min2<- ROCR::performance(LASSO.pred.min2,"tpr","fpr")
LASSO.sens.min2<- ROCR::performance(LASSO.pred.min2,"sens","spec")
LASSO.auc.min2<- pROC::auc(withBNPb$group, LASSO.probs.min2[,2])
LASSO.aucCI2<- pROC::ci.auc(withBNPb$group, LASSO.probs.min2[,2])
par(mfrow=c(1,2))
plot(LASSO.perfs.min2, main = paste("AUC:", round(LASSO.auc.min2,2)))
plot(LASSO.sens.min2)
LASSO.aucCI2
## 95% CI: 0.8296-1 (DeLong)
xgbpreds2<- predict(xgb_tune_final_short2, validation2, type = "prob")
xgb.pred2<- prediction(xgbpreds2$PH, as.factor(labelsB))
xgb.perfs2<- ROCR::performance(xgb.pred2,"tpr","fpr")
xgb.sens2<- ROCR::performance(xgb.pred2,"sens","spec")
xgb.auc2<- pROC::auc(as.factor(labelsB), xgbpreds2[,2])
xgb.aucCI2<- pROC::ci.auc(as.factor(labelsB), xgbpreds2[,2])
par(mfrow=c(1,2))
plot(xgb.perfs2, main = paste("AUC:", round(xgb.auc2,2)))
plot(xgb.sens2)
totalprobs2<- data.frame(patientID = rownames(Boruta.perf2), group = tree.Boruta2$group, RandomForest = Boruta.perf2$PH, rpart = rpart.perf2$PH, LASSO = LASSO.probs.min2$PH, XGB = xgbpreds2$PH)
totalprobs2 <- mutate(totalprobs2, Mean = rowMeans(totalprobs2[3:6]))
totalprobs2$EnsemblePreds2 <- ifelse(totalprobs2$Mean > 0.5, "PH", "HV")
Ensemble.pred2<-prediction(totalprobs2$Mean, totalprobs2$group)
Ensemble.perfs2<- ROCR::performance(Ensemble.pred2, "tpr", "fpr")
kable(totalprobs2) %>% kable_styling(full_width = TRUE)
patientID | group | RandomForest | rpart | LASSO | XGB | Mean | EnsemblePreds2 |
---|---|---|---|---|---|---|---|
988_2_1.txt | HV | 0.741 | 0.9285714 | 0.5754949 | 0.8038322 | 0.7622246 | PH |
987_2_2.txt | PH | 0.856 | 0.9285714 | 0.9999971 | 0.9065028 | 0.9227678 | PH |
988_2_2.txt | HV | 0.532 | 0.0625000 | 0.5965555 | 0.5060492 | 0.4242762 | HV |
991_1_1.txt | PH | 0.908 | 0.9285714 | 0.9998723 | 0.9368197 | 0.9433159 | PH |
756_1_4.txt | PH | 0.913 | 0.9285714 | 0.9636261 | 0.9433010 | 0.9371246 | PH |
756_1_1.txt | PH | 0.892 | 0.9285714 | 0.9472897 | 0.8939501 | 0.9154528 | PH |
990_1_4.txt | PH | 0.950 | 0.9285714 | 0.8271773 | 0.9509395 | 0.9141721 | PH |
990_1_3.txt | PH | 0.744 | 0.9285714 | 0.4253599 | 0.7725604 | 0.7176229 | PH |
759_2_4.txt | PH | 0.941 | 0.9285714 | 0.9949804 | 0.8888467 | 0.9383496 | PH |
760_1_1.txt | PH | 0.917 | 0.9285714 | 0.9741941 | 0.8625593 | 0.9205812 | PH |
990_1_1.txt | PH | 0.848 | 0.9285714 | 0.9999998 | 0.8455595 | 0.9055327 | PH |
756_2_2.txt | PH | 0.746 | 0.9285714 | 0.9991012 | 0.8210467 | 0.8736798 | PH |
991_2_3.txt | PH | 0.741 | 0.0625000 | 0.6952485 | 0.5080644 | 0.5017032 | PH |
989_2_2.txt | PH | 0.787 | 0.9285714 | 0.7774324 | 0.9088891 | 0.8504732 | PH |
760_2_1.txt | PH | 0.694 | 0.9285714 | 0.6341162 | 0.7872966 | 0.7609961 | PH |
770_1_2.txt | PH | 0.899 | 0.9285714 | 0.9441111 | 0.9493203 | 0.9302507 | PH |
779_1_2.txt | PH | 0.767 | 0.9285714 | 0.4301157 | 0.7762326 | 0.7254799 | PH |
986_1_4.txt | PH | 0.529 | 0.0625000 | 0.4565787 | 0.4813725 | 0.3823628 | HV |
994_1_4.txt | HV | 0.246 | 0.0625000 | 0.1335094 | 0.1271603 | 0.1422924 | HV |
980_1_1.txt | HV | 0.256 | 0.0625000 | 0.2953754 | 0.2057881 | 0.2049159 | HV |
980_1_2.txt | HV | 0.142 | 0.0625000 | 0.2926489 | 0.2262291 | 0.1808445 | HV |
982_1_3.txt | HV | 0.313 | 0.0625000 | 0.2995181 | 0.2219070 | 0.2242313 | HV |
758_1_2.txt | HV | 0.219 | 0.0625000 | 0.2667769 | 0.1904820 | 0.1846897 | HV |
982_1_1.txt | HV | 0.371 | 0.9285714 | 0.5217201 | 0.4548452 | 0.5690342 | PH |
pROC::auc(totalprobs2$group, totalprobs2$Mean)
## Area under the curve: 0.9375
pROC::ci.auc(totalprobs2$group, totalprobs2$Mean)
## 95% CI: 0.8432-1 (DeLong)
table(totalprobs2$EnsemblePreds2, totalprobs2$group)
##
## HV PH
## HV 6 1
## PH 2 15
Dashed line models include NTproBNP
layout(mat = matrix(c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,7,7), nrow = 2, byrow = TRUE))
#layout.show(n = 5)
plot(xgb.perfs, col = "dark blue", lty = 1, main = "XGBoost", cex.lab = 1.3, cex.main = 1.5)
plot(xgb.perfs2, col = "dark blue", lty = 2, add = TRUE, cex.lab = 2, cex.main = 1.5)
plot(LASSO.perfs.min, col = "dark blue", lty = 1, main = "LASSO", cex.lab = 1.3, cex.main = 1.5)
plot(LASSO.perfs.min2, col = "dark blue", lty = 2, add = TRUE, cex.lab = 1.3, cex.main = 1.5)
plot(rpart.perfs, col = "dark blue", lty = 1, main = "Rpart", cex.lab = 1.3, cex.main = 1.5)
plot(rpart.perfs2, col = "dark blue", lty = 2, add = TRUE, cex.lab = 1.3, cex.main = 1.5)
plot(Boruta.perfs, col = "dark blue", lty = 1, main = "Random Forest", cex.lab = 1.3, cex.main = 1.5)
plot(Boruta.perfs2, col = "dark blue", lty = 2, add = TRUE, cex.lab = 1.3, cex.main = 1.5)
plot(Ensemble.perfs, col = "dark blue", lty = 1, main = "Ensemble", cex.lab = 1.3, cex.main = 1.5)
plot(Ensemble.perfs2, col = "dark blue", lty = 2, add = TRUE, cex.lab = 1.3, cex.main = 1.5)
plot(perfs.BNP, main = "NTproBNP", cex.lab = 1.3, cex.main = 1.5, col = "dark blue", lty = 2)
plot(NULL ,xaxt='n',yaxt='n',bty='n',ylab='',xlab='', xlim=0:1, ylim=0:1)
legend("center", legend = c("base model (n = 32)", "with NTproBNP (n = 24)"), lty = c(1,2), cex = 1.3, bty = "n")
layout(mat = matrix(c(1,1,2,2,3,3,4,4,5,5,6,6), nrow = 2, byrow = TRUE))
#layout.show(n = 5)
plot(xgb.perfs, col = "dark blue", lty = 1, main = "XGBoost", cex.lab = 1.3, cex.main = 1.5, lwd = 3)
plot(LASSO.perfs.min, col = "dark blue", lty = 1, main = "LASSO", cex.lab = 1.3, cex.main = 1.5, lwd = 3)
plot(rpart.perfs, col = "dark blue", lty = 1, main = "Rpart", cex.lab = 1.3, cex.main = 1.5, lwd = 3)
plot(Boruta.perfs, col = "dark blue", lty = 1, main = "Random Forest", cex.lab = 1.3, cex.main = 1.5, lwd = 3)
plot(Ensemble.perfs, col = "dark blue", lty = 1, main = "Ensemble", cex.lab = 1.3, cex.main = 1.5, lwd = 3)
plot(perfs.BNP, main = "NTproBNP", cex.lab = 1.3, cex.main = 1.5, col = "dark blue", lty = 1, lwd = 3)
separatesplittoconfusion<- function(newlist, splits, d) {
newlist[[d]]<- confusionMatrix(splits[[d]]$pred, splits[[d]]$obs, positive = "PH")
}
confusionaccuracy<- function(x){
(x$table[1]+x$table[4])/(x$table[1]+x$table[2]+x$table[3]+x$table[4])
}
confusionsensitivity<- function(x){
x$table[1]/(x$table[1]+x$table[2])
}
confusionspecificity<- function(x){
(x$table[4]/(x$table[3]+x$table[4]))
}
confusionPPV<- function(x){
(x$table[1]/(x$table[1]+x$table[3]))
}
confusionNPV<- function(x){
(x$table[4]/(x$table[2]+x$table[4]))
}
borutaconfusions<- list()
borutaconfusions<- lapply(1:100, separatesplittoconfusion, newlist = borutaconfusions, splits = boruta.split)
cvborutaaccuracy<- unlist(lapply(borutaconfusions, confusionaccuracy))
cvborutasensitivity<- unlist(lapply(borutaconfusions, confusionsensitivity))
cvborutaspecificity<- unlist(lapply(borutaconfusions, confusionspecificity))
cvborutaPPV <- unlist(lapply(borutaconfusions, confusionPPV))
cvborutaNPV<- unlist(lapply(borutaconfusions, confusionNPV))
summary(cvborutaaccuracy)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4286 0.8125 0.8571 0.8461 0.8750 1.0000
summary(cvborutasensitivity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.6667 0.6667 0.7250 1.0000 1.0000
summary(cvborutaspecificity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5000 0.8000 1.0000 0.9305 1.0000 1.0000
summary(cvborutaPPV)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.3333 0.7500 1.0000 0.9056 1.0000 1.0000 2
summary(cvborutaNPV)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5000 0.8000 0.8000 0.8485 1.0000 1.0000
rpartconfusions<- list()
rpartconfusions<- lapply(1:100, separatesplittoconfusion, newlist = rpartconfusions, splits = rpart.split)
cvrpartaccuracy<- unlist(lapply(rpartconfusions, confusionaccuracy))
cvrpartsensitivity<- unlist(lapply(rpartconfusions, confusionsensitivity))
cvrpartspecificity<- unlist(lapply(rpartconfusions, confusionspecificity))
cvrpartPPV <- unlist(lapply(rpartconfusions, confusionPPV))
cvrpartNPV<- unlist(lapply(rpartconfusions, confusionNPV))
summary(cvrpartaccuracy)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2500 0.4762 0.5714 0.5774 0.6667 0.8571
summary(cvrpartsensitivity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.3333 0.5000 0.4939 0.6667 1.0000
summary(cvrpartspecificity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2000 0.4917 0.6667 0.6358 0.8083 1.0000
summary(cvrpartPPV)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.4000 0.5000 0.5115 0.6250 1.0000 3
summary(cvrpartNPV)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3333 0.5714 0.6667 0.6474 0.7143 1.0000
LASSOconfusions<- list()
LASSOconfusions<- lapply(1:100, separatesplittoconfusion, newlist = LASSOconfusions, splits = LASSO.split.min)
cvLASSOaccuracy<- unlist(lapply(LASSOconfusions, confusionaccuracy))
cvLASSOsensitivity<- unlist(lapply(LASSOconfusions, confusionsensitivity))
cvLASSOspecificity<- unlist(lapply(LASSOconfusions, confusionspecificity))
cvLASSOPPV <- unlist(lapply(LASSOconfusions, confusionPPV))
cvLASSONPV<- unlist(lapply(LASSOconfusions, confusionNPV))
summary(cvLASSOaccuracy)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2500 0.7024 0.7500 0.7596 0.8571 1.0000
summary(cvLASSOsensitivity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.3333 0.6667 0.6433 1.0000 1.0000
summary(cvLASSOspecificity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4000 0.7500 0.8000 0.8375 1.0000 1.0000
summary(cvLASSOPPV)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.6000 0.7500 0.7633 1.0000 1.0000 2
summary(cvLASSONPV)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4000 0.6667 0.8000 0.7951 1.0000 1.0000
XGBconfusions<- list()
XGBconfusions<- lapply(1:100, separatesplittoconfusion, newlist = XGBconfusions, splits = XGB.split)
cvXGBaccuracy<- unlist(lapply(XGBconfusions, confusionaccuracy))
cvXGBsensitivity<- unlist(lapply(XGBconfusions, confusionsensitivity))
cvXGBspecificity<- unlist(lapply(XGBconfusions, confusionspecificity))
cvXGBPPV <- unlist(lapply(XGBconfusions, confusionPPV))
cvXGBNPV<- unlist(lapply(XGBconfusions, confusionNPV))
summary(cvXGBaccuracy)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4286 0.7143 0.8571 0.8296 1.0000 1.0000
summary(cvXGBsensitivity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.6667 0.6667 0.7533 1.0000 1.0000
summary(cvXGBspecificity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2500 0.7500 1.0000 0.8845 1.0000 1.0000
summary(cvXGBPPV)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.3333 0.6667 1.0000 0.8473 1.0000 1.0000 2
summary(cvXGBNPV)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5000 0.7500 0.8000 0.8571 1.0000 1.0000
roc.out_NTproBNP<- roc(as.factor(withBNPb$group), NTproBNPprob)
roc.out_XGB_NTproBNP<- roc(as.factor(labelsB), xgbpreds2[,2])
roc.out_RF_NTproBNP <- roc(tree.Boruta2$group, Boruta.perf2[,2])
roc.out_rpart_NTproBNP <- roc(withBNPb$group, rpart.perf2[,2])
roc.out_LASSO_NTproBNP <- roc(withBNPb$group, LASSO.probs.min2[,2])
roc.out_rpart <- roc(setB2$group, rpart.perf[,2])
roc.out_LASSO <- roc(val.LASSO$group, LASSO.probs.min[,2])
roc.out_XGB <- roc(as.factor(setBshort$group), xgbpreds[,2])
roc.out_RF <- roc(tree.Boruta$group, Boruta.perf[,2])
roc.test(roc.out_RF, roc.out_RF_NTproBNP, paired = FALSE)
##
## DeLong's test for two ROC curves
##
## data: roc.out_RF and roc.out_RF_NTproBNP
## D = -1.5516, df = 42.469, p-value = 0.1282
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.8441558 0.9726562
roc.test(roc.out_rpart, roc.out_rpart_NTproBNP, paired = FALSE)
##
## DeLong's test for two ROC curves
##
## data: roc.out_rpart and roc.out_rpart_NTproBNP
## D = -0.19268, df = 51.469, p-value = 0.848
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.788961 0.812500
roc.test(roc.out_LASSO, roc.out_LASSO_NTproBNP, paired = FALSE)
##
## DeLong's test for two ROC curves
##
## data: roc.out_LASSO and roc.out_LASSO_NTproBNP
## D = -1.4974, df = 55.603, p-value = 0.1399
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.7889610 0.9296875
roc.test(roc.out_XGB, roc.out_XGB_NTproBNP, paired = FALSE)
##
## DeLong's test for two ROC curves
##
## data: roc.out_XGB and roc.out_XGB_NTproBNP
## D = -1.3993, df = 50.743, p-value = 0.1678
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.8214286 0.9531250
roc.test(roc.out_NTproBNP, roc.out_RF_NTproBNP, paired = FALSE)
## Warning in roc.test.roc(roc.out_NTproBNP, roc.out_RF_NTproBNP, paired = FALSE):
## The ROC curves seem to be paired. Consider performing a paired roc.test.
##
## DeLong's test for two ROC curves
##
## data: roc.out_NTproBNP and roc.out_RF_NTproBNP
## D = -0.62677, df = 34.993, p-value = 0.5349
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.9375000 0.9726562
roc.test(roc.out_NTproBNP, roc.out_rpart_NTproBNP, paired = FALSE)
## Warning in roc.test.roc(roc.out_NTproBNP, roc.out_rpart_NTproBNP, paired =
## FALSE): The ROC curves seem to be paired. Consider performing a paired roc.test.
##
## DeLong's test for two ROC curves
##
## data: roc.out_NTproBNP and roc.out_rpart_NTproBNP
## D = 1.1932, df = 35.241, p-value = 0.2408
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.9375 0.8125
roc.test(roc.out_NTproBNP, roc.out_LASSO_NTproBNP, paired = FALSE)
## Warning in roc.test.roc(roc.out_NTproBNP, roc.out_LASSO_NTproBNP, paired =
## FALSE): The ROC curves seem to be paired. Consider performing a paired roc.test.
##
## DeLong's test for two ROC curves
##
## data: roc.out_NTproBNP and roc.out_LASSO_NTproBNP
## D = 0.10982, df = 45.96, p-value = 0.913
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.9375000 0.9296875
roc.test(roc.out_NTproBNP, roc.out_XGB_NTproBNP, paired = FALSE)
## Warning in roc.test.roc(roc.out_NTproBNP, roc.out_XGB_NTproBNP, paired = FALSE):
## The ROC curves seem to be paired. Consider performing a paired roc.test.
##
## DeLong's test for two ROC curves
##
## data: roc.out_NTproBNP and roc.out_XGB_NTproBNP
## D = -0.23747, df = 45.185, p-value = 0.8134
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.937500 0.953125
male<- filter(pheno, Gender == "Male" & AorB == "B") %>% select(filename) %>% as.list(.)
female<- filter(pheno, Gender == "Female" & AorB == "B") %>% select(filename)
Male<- setB[rownames(setB) %in% male[[1]],]
Female<- setB[rownames(setB) %in% female[[1]],]
RFBoruta.trainM <- predict(fit.Boruta, Male)
BorutaInterimM<- confusionMatrix(as.factor(RFBoruta.trainM), as.factor(Male$group), positive = "PH")
BorutaInterimM
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 3 1
## PH 1 7
##
## Accuracy : 0.8333
## 95% CI : (0.5159, 0.9791)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.1811
##
## Kappa : 0.625
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.8750
## Specificity : 0.7500
## Pos Pred Value : 0.8750
## Neg Pred Value : 0.7500
## Prevalence : 0.6667
## Detection Rate : 0.5833
## Detection Prevalence : 0.6667
## Balanced Accuracy : 0.8125
##
## 'Positive' Class : PH
##
RF.perfM<- predict(fit.Boruta, Male[-1], type= "prob")
RF.aucM<- pROC::auc(Male$group, RF.perfM[,2])
RF.aucCIM<- pROC::ci.auc(Male$group, RF.perfM[,2])
RF.aucM
## Area under the curve: 0.9375
RF.aucCIM
## 95% CI: 0.7911-1 (DeLong)
RFBoruta.trainF <- predict(fit.Boruta, Female)
BorutaInterimF<- confusionMatrix(as.factor(RFBoruta.trainF), as.factor(Female$group), positive = "PH")
BorutaInterimF
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 7 2
## PH 3 11
##
## Accuracy : 0.7826
## 95% CI : (0.563, 0.9254)
## No Information Rate : 0.5652
## P-Value [Acc > NIR] : 0.02627
##
## Kappa : 0.5525
##
## Mcnemar's Test P-Value : 1.00000
##
## Sensitivity : 0.8462
## Specificity : 0.7000
## Pos Pred Value : 0.7857
## Neg Pred Value : 0.7778
## Prevalence : 0.5652
## Detection Rate : 0.4783
## Detection Prevalence : 0.6087
## Balanced Accuracy : 0.7731
##
## 'Positive' Class : PH
##
RF.perfF<- predict(fit.Boruta, Female[-1], type= "prob")
RF.aucF<- pROC::auc(Female$group, RF.perfF[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
RF.aucCIF<- pROC::ci.auc(Female$group, RF.perfF[,2])
## Setting levels: control = HV, case = PH
## Setting direction: controls < cases
RF.aucF
## Area under the curve: 0.8154
RF.aucCIF
## 95% CI: 0.5994-1 (DeLong)
predrpartM <- predict(fit.caret.rpart, Male)
confusionMatrix(Male$group, predrpartM)
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 2 2
## PH 1 7
##
## Accuracy : 0.75
## 95% CI : (0.4281, 0.9451)
## No Information Rate : 0.75
## P-Value [Acc > NIR] : 0.6488
##
## Kappa : 0.4
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.6667
## Specificity : 0.7778
## Pos Pred Value : 0.5000
## Neg Pred Value : 0.8750
## Prevalence : 0.2500
## Detection Rate : 0.1667
## Detection Prevalence : 0.3333
## Balanced Accuracy : 0.7222
##
## 'Positive' Class : HV
##
rpart.perfM<- predict(fit.caret.rpart, Male[-1], type= "prob")
rpart.aucM<- pROC::auc(Male$group, rpart.perfM[,2])
rpart.aucCIM<- pROC::ci.auc(Male$group, rpart.perfM[,2])
rpart.aucM
## Area under the curve: 0.625
rpart.aucCIM
## 95% CI: 0.2479-1 (DeLong)
predrpartF <- predict(fit.caret.rpart, Female)
confusionMatrix(Female$group, predrpartF, positive = "PH")
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 7 3
## PH 1 12
##
## Accuracy : 0.8261
## 95% CI : (0.6122, 0.9505)
## No Information Rate : 0.6522
## P-Value [Acc > NIR] : 0.05753
##
## Kappa : 0.6378
##
## Mcnemar's Test P-Value : 0.61708
##
## Sensitivity : 0.8000
## Specificity : 0.8750
## Pos Pred Value : 0.9231
## Neg Pred Value : 0.7000
## Prevalence : 0.6522
## Detection Rate : 0.5217
## Detection Prevalence : 0.5652
## Balanced Accuracy : 0.8375
##
## 'Positive' Class : PH
##
rpart.perfF<- predict(fit.caret.rpart, Female[-1], type= "prob")
rpart.aucF<- pROC::auc(Female$group, rpart.perfF[,2])
rpart.aucCIF<- pROC::ci.auc(Female$group, rpart.perfF[,2])
rpart.aucF
## Area under the curve: 0.85
rpart.aucCIF
## 95% CI: 0.6877-1 (DeLong)
LASSO.pred.minM <- predict(LASSO.min, newdata=Male[-1])
LASSOInterimM<- confusionMatrix(LASSO.pred.minM, Male$group, positive = "PH")
LASSOInterimM
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 3 3
## PH 1 5
##
## Accuracy : 0.6667
## 95% CI : (0.3489, 0.9008)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.6315
##
## Kappa : 0.3333
##
## Mcnemar's Test P-Value : 0.6171
##
## Sensitivity : 0.6250
## Specificity : 0.7500
## Pos Pred Value : 0.8333
## Neg Pred Value : 0.5000
## Prevalence : 0.6667
## Detection Rate : 0.4167
## Detection Prevalence : 0.5000
## Balanced Accuracy : 0.6875
##
## 'Positive' Class : PH
##
LASSO.perfM<- predict(LASSO.min, Male[-1], type= "prob")
LASSO.aucM<- pROC::auc(Male$group, LASSO.perfM[,2])
LASSO.aucCIM<- pROC::ci.auc(Male$group, LASSO.perfM[,2])
LASSO.aucM
## Area under the curve: 0.7188
LASSO.aucCIM
## 95% CI: 0.393-1 (DeLong)
LASSO.pred.minF <- predict(LASSO.min, newdata=Female[-1])
LASSOInterimF<- confusionMatrix(LASSO.pred.minF, Female$group, positive = "PH")
LASSOInterimF
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 6 1
## PH 4 12
##
## Accuracy : 0.7826
## 95% CI : (0.563, 0.9254)
## No Information Rate : 0.5652
## P-Value [Acc > NIR] : 0.02627
##
## Kappa : 0.5418
##
## Mcnemar's Test P-Value : 0.37109
##
## Sensitivity : 0.9231
## Specificity : 0.6000
## Pos Pred Value : 0.7500
## Neg Pred Value : 0.8571
## Prevalence : 0.5652
## Detection Rate : 0.5217
## Detection Prevalence : 0.6957
## Balanced Accuracy : 0.7615
##
## 'Positive' Class : PH
##
LASSO.perfF<- predict(LASSO.min, Female[-1], type= "prob")
LASSO.aucF<- pROC::auc(Female$group, LASSO.perfF[,2])
LASSO.aucCIF<- pROC::ci.auc(Female$group, LASSO.perfF[,2])
LASSO.aucF
## Area under the curve: 0.8538
LASSO.aucCIF
## 95% CI: 0.6809-1 (DeLong)
xgbpredictfinalshortM<- predict(xgb_tune_final_short, Male[,c("group",xgb_mirs$Feature)])
XGBInterimM<- confusionMatrix(xgbpredictfinalshortM, as.factor(Male$group), positive = "PH")
XGBInterimM
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 4 0
## PH 0 8
##
## Accuracy : 1
## 95% CI : (0.7354, 1)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.007707
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.6667
## Detection Rate : 0.6667
## Detection Prevalence : 0.6667
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : PH
##
XGB.perfM<- predict(xgb_tune_final_short, Male[,c("group",xgb_mirs$Feature)], type= "prob")
XGB.aucM<- pROC::auc(Male$group, XGB.perfM[,2])
XGB.aucCIM<- pROC::ci.auc(Male$group, XGB.perfM[,2])
## Warning in ci.auc.roc(roc = roc, ...): ci.auc() of a ROC curve with AUC == 1 is
## always 1-1 and can be misleading.
XGB.aucM
## Area under the curve: 1
XGB.aucCIM
## 95% CI: 1-1 (DeLong)
xgbpredictfinalshortF<- predict(xgb_tune_final_short, Female[,c("group",xgb_mirs$Feature)])
XGBInterimF<- confusionMatrix(xgbpredictfinalshortF, as.factor(Female$group), positive = "PH")
XGBInterimF
## Confusion Matrix and Statistics
##
## Reference
## Prediction HV PH
## HV 6 2
## PH 4 11
##
## Accuracy : 0.7391
## 95% CI : (0.5159, 0.8977)
## No Information Rate : 0.5652
## P-Value [Acc > NIR] : 0.06809
##
## Kappa : 0.4567
##
## Mcnemar's Test P-Value : 0.68309
##
## Sensitivity : 0.8462
## Specificity : 0.6000
## Pos Pred Value : 0.7333
## Neg Pred Value : 0.7500
## Prevalence : 0.5652
## Detection Rate : 0.4783
## Detection Prevalence : 0.6522
## Balanced Accuracy : 0.7231
##
## 'Positive' Class : PH
##
XGB.perfF<- predict(xgb_tune_final_short, Female[,c("group",xgb_mirs$Feature)], type= "prob")
XGB.aucF<- pROC::auc(Female$group, XGB.perfF[,2])
XGB.aucCIF<- pROC::ci.auc(Female$group, XGB.perfF[,2])
XGB.aucF
## Area under the curve: 0.7615
XGB.aucCIF
## 95% CI: 0.5481-0.975 (DeLong)
all.Boruta.perf<- predict(fit.Boruta, select(dataset, -group, -PHstatus, -AB, -filename), type= "prob")
all.rpart.perf<- predict(fit.caret.rpart, select(dataset, -group, -PHstatus, -AB, -filename), type= "prob")
all.LASSO.probs.min <- predict(LASSO.min, newdata=select(dataset, -group, -PHstatus, -AB, -filename), type = "prob")
all.xgbpreds<- predict(xgb_tune_final_short, select(dataset, xgb_mirs$Feature, -group, -PHstatus, -AB, -filename), type = "prob")
allprobs<- data.frame(patientID = dataset$filename, group = dataset$group, RandomForest = all.Boruta.perf$PH, rpart = all.rpart.perf$PH, LASSO = all.LASSO.probs.min$PH, XGB = all.xgbpreds$PH)
allprobs <- mutate(allprobs, Mean = rowMeans(allprobs[3:6]))
allprobs$EnsemblePreds <- ifelse(allprobs$Mean > 0.5, "PH", "HV")
#write.csv(allprobs, "~/Google Drive File Stream/My Drive/miRNA/modelpredictions.csv")
kable(allprobs) %>% kable_styling(full_width = TRUE)
patientID | group | RandomForest | rpart | LASSO | XGB | Mean | EnsemblePreds |
---|---|---|---|---|---|---|---|
773_1_3.txt | PH | 0.861 | 0.8666667 | 0.8610238 | 0.9266345 | 0.8788313 | PH |
761_2_4.txt | PH | 0.975 | 0.9230769 | 0.8864699 | 0.8823745 | 0.9167303 | PH |
988_2_1.txt | HV | 0.625 | 0.8666667 | 0.6270985 | 0.8060075 | 0.7311932 | PH |
987_2_1.txt | PH | 0.861 | 0.9230769 | 0.8465702 | 0.7811339 | 0.8529452 | PH |
991_1_4.txt | HV | 0.204 | 0.8666667 | 0.6789439 | 0.3211375 | 0.5176870 | PH |
762_1_1.txt | PH | 0.952 | 0.9230769 | 0.6710947 | 0.8816717 | 0.8569608 | PH |
991_1_2.txt | PH | 0.865 | 0.8666667 | 0.8497567 | 0.7917249 | 0.8432871 | PH |
765_1_1.txt | PH | 0.926 | 0.9230769 | 0.9355463 | 0.7913492 | 0.8939931 | PH |
987_2_2.txt | PH | 0.867 | 0.9230769 | 0.9681849 | 0.7909521 | 0.8873035 | PH |
988_2_2.txt | HV | 0.642 | 0.8666667 | 0.7828652 | 0.8272272 | 0.7796898 | PH |
988_2_3.txt | PH | 0.469 | 0.8666667 | 0.6646667 | 0.4601494 | 0.6151207 | PH |
991_1_1.txt | PH | 0.892 | 0.9230769 | 0.9760982 | 0.9143730 | 0.9263870 | PH |
991_2_4.txt | PH | 0.949 | 0.8666667 | 0.7041091 | 0.9094258 | 0.8573004 | PH |
990_2_4.txt | PH | 0.909 | 0.9230769 | 0.5705039 | 0.8755173 | 0.8195245 | PH |
988_2_4.txt | HV | 0.301 | 0.0000000 | 0.6179052 | 0.6803067 | 0.3998030 | HV |
988_1_1.txt | HV | 0.266 | 0.0454545 | 0.4856227 | 0.4597225 | 0.3141999 | HV |
986_1_1.txt | PH | 0.866 | 0.8666667 | 0.8318733 | 0.7475660 | 0.8280265 | PH |
988_1_2.txt | PH | 0.908 | 0.8666667 | 0.9557704 | 0.7759580 | 0.8765988 | PH |
981_1_3.txt | HV | 0.145 | 0.0000000 | 0.4265124 | 0.3136015 | 0.2212785 | HV |
986_1_3.txt | PH | 0.877 | 0.8666667 | 0.6068253 | 0.6847930 | 0.7588212 | PH |
994_2_4.txt | HV | 0.112 | 0.0454545 | 0.4433199 | 0.1719547 | 0.1931823 | HV |
988_1_3.txt | HV | 0.244 | 0.8666667 | 0.6279423 | 0.5507385 | 0.5723369 | PH |
756_1_4.txt | PH | 0.827 | 0.9230769 | 0.9729316 | 0.8753925 | 0.8996003 | PH |
766_1_3.txt | HV | 0.075 | 0.0454545 | 0.0754960 | 0.1578765 | 0.0884568 | HV |
773_1_4.txt | HV | 0.115 | 0.0454545 | 0.2784155 | 0.2450149 | 0.1709712 | HV |
780_2_1.txt | HV | 0.588 | 0.9230769 | 0.5069344 | 0.3914426 | 0.6023635 | PH |
993_2_4.txt | PH | 0.753 | 0.0454545 | 0.2654226 | 0.4854265 | 0.3873259 | HV |
756_1_1.txt | PH | 0.805 | 0.9230769 | 0.8110336 | 0.8562466 | 0.8488393 | PH |
987_1_2.txt | PH | 0.915 | 0.9230769 | 0.5369077 | 0.8458510 | 0.8052089 | PH |
990_1_4.txt | PH | 0.908 | 0.9230769 | 0.8860309 | 0.9222433 | 0.9098378 | PH |
991_1_3.txt | PH | 0.804 | 0.8666667 | 0.7985939 | 0.4786575 | 0.7369795 | PH |
990_1_3.txt | PH | 0.671 | 0.9230769 | 0.4652240 | 0.7577598 | 0.7042652 | PH |
986_2_1.txt | PH | 0.877 | 0.8666667 | 0.7292580 | 0.7152265 | 0.7970378 | PH |
759_2_4.txt | PH | 0.918 | 0.9230769 | 0.7153382 | 0.7805382 | 0.8342383 | PH |
760_1_1.txt | PH | 0.888 | 0.9230769 | 0.9594336 | 0.7051297 | 0.8689100 | PH |
990_1_1.txt | PH | 0.799 | 0.8666667 | 0.7554185 | 0.8007538 | 0.8054598 | PH |
756_2_2.txt | PH | 0.621 | 0.8666667 | 0.7759468 | 0.6305977 | 0.7235528 | PH |
989_2_4.txt | PH | 0.859 | 0.8000000 | 0.4648375 | 0.6327240 | 0.6891404 | PH |
986_1_2.txt | PH | 0.906 | 0.8666667 | 0.8104660 | 0.7922019 | 0.8438336 | PH |
991_2_3.txt | PH | 0.858 | 0.9230769 | 0.8211853 | 0.8018932 | 0.8510389 | PH |
989_2_2.txt | PH | 0.713 | 0.8666667 | 0.7855566 | 0.7922395 | 0.7893657 | PH |
989_2_1.txt | PH | 0.916 | 0.9230769 | 0.6768916 | 0.8776831 | 0.8484129 | PH |
760_1_3.txt | PH | 0.865 | 0.8000000 | 0.6817949 | 0.7203315 | 0.7667816 | PH |
760_1_4.txt | PH | 0.865 | 0.8000000 | 0.9294178 | 0.6711552 | 0.8163932 | PH |
989_1_3.txt | PH | 0.786 | 0.8666667 | 0.5553606 | 0.6654003 | 0.7183569 | PH |
756_2_3.txt | PH | 0.844 | 0.8666667 | 0.6277201 | 0.6890227 | 0.7568524 | PH |
760_2_1.txt | PH | 0.710 | 0.0454545 | 0.6660852 | 0.7215491 | 0.5357722 | PH |
994_2_2.txt | HV | 0.047 | 0.0454545 | 0.0970803 | 0.1024557 | 0.0729976 | HV |
989_1_2.txt | PH | 0.885 | 0.8666667 | 0.8363723 | 0.7992688 | 0.8468269 | PH |
770_1_2.txt | PH | 0.877 | 0.9230769 | 0.8527485 | 0.9214918 | 0.8935793 | PH |
770_1_3.txt | PH | 0.981 | 0.9230769 | 0.9623633 | 0.9516933 | 0.9545334 | PH |
761_1_1.txt | PH | 0.964 | 0.9230769 | 0.9188469 | 0.8966982 | 0.9256555 | PH |
779_1_2.txt | PH | 0.713 | 0.9230769 | 0.5425231 | 0.7151947 | 0.7234487 | PH |
779_1_4.txt | PH | 0.631 | 0.9230769 | 0.2968497 | 0.5210212 | 0.5929869 | PH |
762_2_4.txt | PH | 0.978 | 0.9230769 | 0.9306949 | 0.9167427 | 0.9371286 | PH |
761_1_2.txt | PH | 0.980 | 0.9230769 | 0.8585149 | 0.9279700 | 0.9223905 | PH |
761_1_3.txt | PH | 0.950 | 0.9230769 | 0.8871988 | 0.9021694 | 0.9156113 | PH |
761_1_4.txt | PH | 0.987 | 0.9230769 | 0.9567790 | 0.9192775 | 0.9465334 | PH |
761_2_1.txt | PH | 0.976 | 0.9230769 | 0.7876303 | 0.8600428 | 0.8866875 | PH |
988_1_4.txt | PH | 0.896 | 0.8666667 | 0.7124172 | 0.7363076 | 0.8028479 | PH |
761_2_3.txt | PH | 0.922 | 0.9230769 | 0.7181012 | 0.8198347 | 0.8457532 | PH |
986_1_4.txt | PH | 0.540 | 0.0454545 | 0.3144224 | 0.5690590 | 0.3672340 | HV |
757_2_3.txt | HV | 0.063 | 0.0454545 | 0.1894561 | 0.1470265 | 0.1112343 | HV |
994_1_3.txt | HV | 0.240 | 0.0454545 | 0.0996630 | 0.2303311 | 0.1538622 | HV |
994_1_4.txt | HV | 0.251 | 0.0454545 | 0.1631164 | 0.2609853 | 0.1801391 | HV |
980_1_1.txt | HV | 0.327 | 0.0454545 | 0.2625634 | 0.2860165 | 0.2302586 | HV |
757_2_4.txt | HV | 0.147 | 0.0454545 | 0.2152335 | 0.2754282 | 0.1707790 | HV |
980_1_2.txt | HV | 0.251 | 0.0454545 | 0.3797412 | 0.4075463 | 0.2709355 | HV |
980_1_3.txt | HV | 0.119 | 0.0454545 | 0.3119756 | 0.1546872 | 0.1577793 | HV |
982_2_3.txt | HV | 0.147 | 0.0454545 | 0.6152616 | 0.2906246 | 0.2745852 | HV |
982_2_4.txt | HV | 0.085 | 0.0454545 | 0.4999167 | 0.3045401 | 0.2337278 | HV |
994_1_1.txt | HV | 0.177 | 0.0454545 | 0.5773281 | 0.3858711 | 0.2964134 | HV |
994_1_2.txt | HV | 0.234 | 0.8000000 | 0.2296237 | 0.3995842 | 0.4158020 | HV |
982_1_3.txt | HV | 0.489 | 0.0454545 | 0.4489525 | 0.4375283 | 0.3552338 | HV |
982_1_4.txt | HV | 0.121 | 0.0454545 | 0.4093854 | 0.2147404 | 0.1976451 | HV |
757_2_2.txt | HV | 0.073 | 0.0454545 | 0.1239309 | 0.1248079 | 0.0917983 | HV |
758_1_1.txt | HV | 0.124 | 0.0454545 | 0.3395365 | 0.2105049 | 0.1798740 | HV |
982_2_1.txt | HV | 0.022 | 0.0454545 | 0.1881976 | 0.1196183 | 0.0938176 | HV |
758_1_2.txt | HV | 0.298 | 0.0454545 | 0.3030106 | 0.2726333 | 0.2297746 | HV |
758_1_3.txt | HV | 0.240 | 0.0454545 | 0.2883087 | 0.4288145 | 0.2506444 | HV |
982_1_1.txt | HV | 0.420 | 0.0454545 | 0.5840516 | 0.4051780 | 0.3636710 | HV |
982_1_2.txt | HV | 0.070 | 0.0454545 | 0.0989643 | 0.1395528 | 0.0884929 | HV |
982_2_2.txt | HV | 0.012 | 0.0454545 | 0.0852925 | 0.1085789 | 0.0628315 | HV |
994_2_3.txt | HV | 0.112 | 0.0454545 | 0.2592133 | 0.2491712 | 0.1664598 | HV |
756_1_3.txt | PH | 0.988 | 0.9230769 | 0.9497630 | 0.9242942 | 0.9462835 | PH |
989_1_1.txt | PH | 0.879 | 0.9230769 | 0.7748558 | 0.8681318 | 0.8612661 | PH |
994_2_1.txt | PH | 0.850 | 0.8000000 | 0.9166502 | 0.7574031 | 0.8310133 | PH |
769_1_2.txt | HV | 0.032 | 0.0454545 | 0.0411933 | 0.1430631 | 0.0654277 | HV |
766_2_2.txt | HV | 0.204 | 0.0000000 | 0.4386853 | 0.4356677 | 0.2695882 | HV |
780_2_2.txt | HV | 0.151 | 0.9230769 | 0.3142257 | 0.2625172 | 0.4127049 | HV |
780_1_2.txt | HV | 0.079 | 0.0454545 | 0.2048144 | 0.1127005 | 0.1104923 | HV |
780_1_1.txt | HV | 0.396 | 0.9230769 | 0.4244571 | 0.4439179 | 0.5468630 | PH |
768_1_1.txt | HV | 0.108 | 0.0454545 | 0.1547524 | 0.2058445 | 0.1285129 | HV |
773_2_1.txt | HV | 0.942 | 0.9230769 | 0.9280066 | 0.8514320 | 0.9111289 | PH |
780_2_4.txt | HV | 0.302 | 0.9230769 | 0.6583041 | 0.6602639 | 0.6359112 | PH |
767_2_3.txt | HV | 0.492 | 0.0454545 | 0.2876439 | 0.5135296 | 0.3346570 | HV |
770_2_2.txt | PH | 0.893 | 0.9230769 | 0.9282741 | 0.8327190 | 0.8942675 | PH |
766_1_4.txt | PH | 0.858 | 0.9230769 | 0.4341256 | 0.6155569 | 0.7076899 | PH |
774_1_1.txt | PH | 0.982 | 0.9230769 | 0.9660126 | 0.9576942 | 0.9571959 | PH |
771_1_1.txt | PH | 0.975 | 0.9230769 | 0.8549635 | 0.9221686 | 0.9188022 | PH |
768_1_3.txt | PH | 0.875 | 0.8666667 | 0.8144125 | 0.8513563 | 0.8518589 | PH |
766_2_3.txt | PH | 0.958 | 0.9230769 | 0.7984063 | 0.8189564 | 0.8746099 | PH |
779_2_4.txt | PH | 0.953 | 0.9230769 | 0.9452917 | 0.9173815 | 0.9346875 | PH |
766_1_2.txt | PH | 0.861 | 0.9230769 | 0.7236454 | 0.6504859 | 0.7895521 | PH |
767_2_4.txt | PH | 0.255 | 0.8000000 | 0.2361473 | 0.2724249 | 0.3908930 | HV |
774_1_3.txt | PH | 0.401 | 0.9230769 | 0.3690218 | 0.5977038 | 0.5727006 | PH |
780_2_3.txt | PH | 0.853 | 0.9230769 | 0.7862459 | 0.6164631 | 0.7946965 | PH |