-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathFINAL_Gaussian and Ising Graphs.R
149 lines (115 loc) · 6.13 KB
/
FINAL_Gaussian and Ising Graphs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#-------------------------- Gaussian Model --------------------------------#
setwd("/Users/BlackHawk/Desktop/TheBigD/")
file = read.csv("Wages and Salaries by Industry (USA BEA).csv", header = FALSE, stringsAsFactors = FALSE)
preserve_col_names = file[5:5,2:ncol(file)]
file = file[6:nrow(file),2:ncol(file)]
colnames(file) = preserve_col_names
#specify rows to keep
keep_rows = c(5,6,8,9,10,14,26,36,37,seq(39,43), seq(44,51), 53,54,55,56,
seq(58,61),63,64,66,67,68,71,72,seq(74,78),80,81,
83,84,88,91,93,96)
rownames(file) = seq(1,nrow(file)) #normalize row numbers
#remove headers and keep the rows with *same level of subdivisions*
# Also, we set stringAsFactors as FALSE so we can more easily manipulate the column containing industry names
sub_file = data.frame(file[keep_rows,], stringsAsFactors = FALSE)
#clarify 2 "durable goods" rows and "general government"
sub_file[8:9,1] = c("Wholesale Trade Durable goods", "Wholesale Trade Nondurable goods")
sub_file[c(47,49),1] = c("Federal General Gov", "State and Local General Gov")
colnames(sub_file) = preserve_col_names
#Get the first column and make that the row names
names_of_rows = sub_file[,1:1] #name the rows after industries
#remove the column containing industry names and make numeric in order to perform computations
sub_file = apply(sub_file[,2:ncol(sub_file)], 2, as.numeric)
rownames(sub_file) = names_of_rows
# rows are years and columns are
year_x_industry = t(sub_file) # Data is now short and wide -> high dimensional and potentially sparse
# Now we have the data formated the way we want!
library(GGMselect)
# You can read the package details here:
# https://cran.r-project.org/web/packages/GGMselect/vignettes/Notice.pdf
# Notice on pg. 4, "The Lasso-And family GbLA derives from the estimation procedure proposed by
# Meinshausen and Bühlmann". That's exactly what we want!
# One line of code...
graph_computation = selectFast(year_x_industry, K=2, family = "LA")
# G is a matrix of the graph in graph_computation
# We need to create a graph from the matrix, and can treat matrix like adjecency mat
library(igraph)
graph_to_plot = graph_from_adjacency_matrix(graph_computation$LA$G, mode = "undirected")
#name the vertecies, V(graph) will return all the vertecies for graph
temp = seq(1,length(names_of_rows))
name_dictionary = data.frame(temp, names_of_rows)
V(graph_to_plot)$name <- temp
plot.igraph(graph_to_plot)
View(name_dictionary)
#--------------------------------------------------------------------------#
library(plyr)
setwd("/Users/BlackHawk/Desktop/TheBigD/VOTER Survey/")
survey_data = read.csv("VOTER_Survey_December16_Release1.csv")
#-------------------------- Ising Model --------------------------------#
# ----- Matrix of voter preferences for candidates converted to numeric values ---
candidates_favor = c("fav_trump_2016","fav_cruz_2016", "fav_ryan_2016",
"fav_romn_2016", "fav_obama_2016", "fav_hrc_2016",
"fav_sanders_2016", "fav_rubio_2016")
opinions = c("Very favorable", "Somewhat favorable",
"Somewhat unfavorable", "Very unfavorable",
"Don't know", "")
numeric_scale = c(1, 1, 0, 0, 0, 0) #Make it binary data and use Ising w/ logistic
favorable_opinion_matrix = survey_data[,candidates_favor]
favorable_opinion_matrix = mapvalues(as.matrix(favorable_opinion_matrix),
from = opinions,
to = numeric_scale
)
states = survey_data["inputstate_2016"]
favorable_opinion_matrix = apply(favorable_opinion_matrix,2,as.numeric) #convert to numeric and transpose
library(IsingFit) #Great Package!
# Can see package here: https://cran.r-project.org/web/packages/IsingFit/IsingFit.pdf
# Looking at the Reference list (pg 5), we see "Ravikumar, P., Wainwright, M. J., & Lafferty, J. D"
# Which is the paper we're using
#----- try candidates then states -------------#
# c = nrow(unique(states)) #states is a vector, so use nrow, but will make it columns in following matrix
# r = nrow(favorable_opinion_matrix)
# zero_mat = matrix(rep(0, r*c),r,c)
# colnames(zero_mat) = t(unique(states))
#
# states = as.matrix(states["inputstate_2016"])
# mat_cor = cbind(1:r, match(states, unique(states)) )
#
# zero_mat[mat_cor] = 1
#
# new_mat = cbind(favorable_opinion_matrix, zero_mat)
#------ Just the candidates
IsingResults = IsingFit(favorable_opinion_matrix, plot = TRUE) #can see plot generated by package
graph_to_plot = graph_from_adjacency_matrix(IsingResults$weiadj, mode = "undirected")
plot.igraph(graph_to_plot)
#---- Look at iGraph -----#
#Some other network plotting libraries/APIs include 'networkD3' and 'network'
# iGraph is by far the most extensive library for working with graphs
# http://igraph.org/r/doc/
library(igraph)
# We will use iGraph to plot the graph created above
# G is a matrix of the graph in graph_computation
# We need to create a graph from the matrix, and can treat matrix like adjecency mat
graph_to_plot = graph_from_adjacency_matrix(graph_computation$C01.LA$G, mode = "undirected")
#name the vertecies, V(graph) will return all the vertecies for graph
V(graph_to_plot)$name <- candidates_favor
plot.igraph(graph_to_plot)
#--------------------------------------------------------------------------#
#---------------------- Same model with but with 50 states ----------------#
#--------------------------------------------------------------------------#
# ---------- Other Graphing Libraries/APIs ---------#
# -- Network D3
#### Must first install dependencies, run the following:
#install.packages("magrittr", dependencies = TRUE)
#install.packages("igraph", dependencies = TRUE)
#install.packages("networkD3", dependencies = TRUE)
#library(networkD3)
#simpleNetwork(as.data.frame(graph_computation$C01.LA$G))
# -- network
# library(network)
# Gr <- simulateGraph(p = ncol(favorable_opinion_matrix), eta = 0.11) #generate random graph
# gV <- network(Gr$G) # gV is now matrix of the graph generated
# a <- plot(gV, usearrows = FALSE) #plot the random graph
# plot_me <- network(graph_computation$C01.LA$G)
# plot(plot_me, coord = a, usearrows = FALSE) #plot the graph, using coord = a gives form
# -- gmm
#library(gmm)