-
Notifications
You must be signed in to change notification settings - Fork 119
/
Copy pathglass_LDA.R
63 lines (47 loc) · 1.61 KB
/
glass_LDA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#### ******* Forensic Glass ****** ####
library(MASS) ## a library of example datasets
library(tidyverse)
library(mvtnorm)
data(fgl) ## loads the data into R; see help(fgl)
# Recall:
# the target variable is type:
# WinF: float glass window
# WinNF: non-float window
# Veh: vehicle window
# Con: container (bottles)
# Tabl: tableware
# Head: vehicle headlamp
# set up a plot
p0 = ggplot(data = fgl)
# Look at the (RI, Mg) joint distribution for each type
p0 + geom_point(aes(x=RI, y=Mg)) + facet_wrap(~type)
# let's look at (mu, Sigma) for two classes:
X_WinNF = fgl %>% filter(type=='WinNF') %>% dplyr::select(RI, Mg)
mu_WinNF = colMeans(X_WinNF)
Sigma_WinNF = cov(X_WinNF)
X_veh = fgl %>% filter(type=='Veh') %>% dplyr::select(RI, Mg)
mu_veh = colMeans(X_veh)
Sigma_veh = cov(X_veh)
# compare likelihoods of a sample point
X_all = fgl %>% dplyr::select(RI, Mg)
y_all = fgl$type
X_all[147,]
dmvnorm(X_all[147,], mu_WinNF, Sigma_WinNF, log=TRUE)
dmvnorm(X_all[147,], mu_veh, Sigma_veh, log=TRUE)
y_all[147]
# using these two variables on a training set
train_set = sample.int(214, 180, replace=FALSE)
lda1 = lda(type ~ RI + Mg, data=fgl[train_set,])
summary(lda1)
lda1$means
predict(lda1, fgl[-train_set,])$posterior
predict(lda1, fgl[-train_set,])$class
confusion = table(y_all[-train_set], predict(lda1, fgl[-train_set,])$class)
sum(diag(confusion))/sum(confusion)
## all vars?
lda2 = lda(type ~ ., data=fgl[train_set,])
lda2$means
predict(lda2, fgl[-train_set,])$posterior
predict(lda2, fgl[-train_set,])$class
confusion2 = table(y_all[-train_set], predict(lda2, fgl[-train_set,])$class)
sum(diag(confusion2))/sum(confusion2)