-
Notifications
You must be signed in to change notification settings - Fork 0
/
Chunk02-Download the datasets, GPL200.R
116 lines (66 loc) · 2.6 KB
/
Chunk02-Download the datasets, GPL200.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
################################################################################
# &&&....&&& % Project: Identification of HKG candidates in C. elegans #
# &&&&&&..&&&&&& % Author: Bo Li, Jingxin Tao, Youjin Hao #
# &&&&&&&&&&&&&& % Date: Dec. 24th, 2019 #
# &&&&&&&&&&&& % #
# &&&&&&&& % Environment: R version 3.5.3; #
# &&&& % Platform: x86_64-pc-linux-gnu (64-bit) #
# & % #
################################################################################
### ****************************************************************************
### code chunk number 02: Download the datasets, GPL200.
### ****************************************************************************
### Step-01. Obtain the all GSE datasets with GPL200.
library(GEOquery)
dir.create("GSE datasets about C.elegans (GPL200)")
setwd("GSE datasets about C.elegans (GPL200)")
### get the GSE Series Number.
gpl <- getGEO('GPL200') # C.elegans.
gpl_gse <- gpl@header$series_id
### download the GSE Series datasets.
gse_download <- gpl_gse
gse_non <- NULL
for (i in gse_download) {
test <- try(getGEOSuppFiles(i, makeDirectory = TRUE, baseDir = getwd(),
fetch_files = TRUE, filter_regex = NULL),
silent=TRUE)
if (class(test) == "NULL") {
gse_non <- c(gse_non, i)
next
}
}
gse_downloaded <- dir()
gse_non
# save(gse_non, file = "gse_non.RData")
write.csv(gse_non, file = "undownloaded_datasets.csv")
gpl_gse[-match(gse_downloaded, gpl_gse)]
dir.create("Other_processed_data")
setwd("Other_processed_data")
dir.create("Expression datasets")
setwd("Expression datasets")
library(ArrayExpress)
gse_non2 <- NULL
gse_non_AE <- paste("E-GEOD", (gsub("GSE", "", gse_non)), sep = "-")
for (m in gse_non_AE) {
dat <- try(getAE(m, type = "processed"), silent = TRUE)
if (class(dat) == "NULL") {
gse_non2 <- c(gse_non2, i)
next
}
}
gse_non2
del_no <- setdiff(1:length(dir()), grep("processed-data", dir()))
file.remove(dir()[del_no])
setwd("..")
dir.create("array_design")
setwd("array_design")
getAE(gse_non_AE[1], type = "processed")
del_no <- setdiff(1:length(dir()), grep("A-AFFY-", dir()))
file.remove(dir()[del_no])
dir()
write.csv(gse_non, file = "gse_non.csv")
setwd("..")
setwd("..")
# source("http://bioconductor.org/biocLite.R")
# biocLite("ArrayExpress")
# End.