This repository has been archived by the owner on Jun 12, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathstep7_prepareData.R
63 lines (40 loc) · 2.25 KB
/
step7_prepareData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
library(dplyr)
library(data.table)
datasetPrepared <- select(datasetNatVisiblis,
Keyword,
URL,
`Status Code`,
`Title 1`,
`Title 1 Length`,
`H1-1`,
`H1-1 length`,
`H1-2`,
`H1-2 length`,
`H2-2`,
`H2-2 length`,
`Word Count`,
`Text Ratio`,
`Inlinks`,
`Outlinks`,
`Response Time`,
ExtBackLinks,
RefDomains,
RefIPs,
RefSubNets,
CitationFlow,
TrustFlow,
TrustMetric,
TopicalTrustFlow_Topic_0,
TopicalTrustFlow_Value_0,
TopicalTrustFlow_Topic_1,
TopicalTrustFlow_Value_1,
TopicalTrustFlow_Topic_2,
TopicalTrustFlow_Value_2,
ast,
asp,
isTopTen)
setnames(datasetPrepared, "ast", "Visiblis_Title")
setnames(datasetPrepared, "asp", "Visiblis_Page")
# filter only rescode 200
datasetCleaned <- filter(datasetPrepared, `Status Code`==200 )
write.csv2(datasetCleaned,"./dataset/dataset.csv")