-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.Rhistory
92 lines (92 loc) · 1.94 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
if (!require(dplyr)) install.packages("dplyr")
if (!require(data.table)) install.packages("data.table")
if (!require(sparklyr)) install.packages("sparklyr")
library(dplyr)
library(data.table)
library(sparklyr)
spark_install(version = "1.6.2")
sc <- spark_connect(master = "local")
# If you have port conflicts, try:
# config <- spark_config()
# config$sparklyr.gateway.port <- 5454
# sc <- spark_connect(master = "local", config = config)
sc
src_tbls(sc)
spark_web(sc)
system.time({
ship <- fread("data/cfs_2012_pumf_csv.txt")
ship_sp <- copy_to(sc, ship, "ship", overwrite = T)
})
src_tbls(sc)
system.time(
ship2_sp <- spark_read_csv(sc, "ship2",
"data/cfs_2012_pumf_csv.txt")
)
colnames(ship_sp)
tbl_vars(ship2_sp)
head(ship2_sp) %>% View
ship_sp %>% count(ORIG_STATE)
head(ship_sp)
spark_log(sc, n = 3)
ship_values <- ship_sp %>%
select(contains("SHIPMT"))
names(ship_values)
ship_values %>% head
head(ship_values)
system.time({
lm_sp <- ship_values %>%
select(-SHIPMT_ID) %>%
ml_linear_regression(SHIPMT_VALUE ~ .)
})
system.time({
dat <- ship %>%
select(contains("SHIPMT")) %>%
select(-SHIPMT_ID)
lm_R <- lm(SHIPMT_VALUE ~ ., data = dat)
})
class(lm_sp)
class(lm_R)
cat("\nSpark LM Summary Output\n")
summary(lm_sp)
cat("\n\nR LM Summary Output\n")
summary(lm_R)
names(lm_sp)
# plot(lm_R)
plot(lm_sp)
plot(lm_sp %>% summary)
a <- c(1L,2L)
a
list(a=1, b=2)
data.frames(a= 1:10, b= 2:11)
data.frame(a= 1:10, b= 2:11)
T
df <- data.frame(a= 1:10, b= 2:11)
df
copy_to(sc, df)
names(ship_sp)
colnames(ship_sp)
head(ship_sp)
ship %>% class
ship
require(tidyr)
ship2
df
ship2_sp
ship2_sp %>% nest(DEST_MA)
db_drop_table(sc, "ship2")
ship
ship %>% str()
ship %>% class
ship_sp
# Cache Spark table to memory
tbl_cache(sc, "ship")
rm(a)
spark_install()
rm(list=ls())
if (!require(dplyr)) install.packages("dplyr")
if (!require(data.table)) install.packages("data.table")
if (!require(sparklyr)) install.packages("sparklyr")
library(dplyr)
library(data.table)
library(sparklyr)
spark_install()