-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_preprocessing.R
65 lines (43 loc) · 1.35 KB
/
data_preprocessing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# read a csv file
data <- read.csv("SFBay.csv", sep=";")
colSums(is.na(data))
# columns to remove that have more than 90% of values missing:
# Discrete.Chlorophyll
# Chlorophyll.a.a.PHA
# Discrete.Oxygen
# Discrete.SPM
# Measured.Extinction.Coefficient
# Calculated.Extinction.Coefficient
# Nitrite
# Nitrate...Nitrite
# Ammonium
# Phosphate
# Silicate
data$Discrete.Chlorophyll <- NULL
data$Chlorophyll.a.a.PHA <- NULL
data$Discrete.Oxygen <- NULL
data$Discrete.SPM<- NULL
data$Measured.Extinction.Coefficient <- NULL
data$Calculated.Extinction.Coefficient <- NULL
data$Nitrate...Nitrite <- NULL
data$Nitrite <- NULL
data$ Ammonium <- NULL
data$Phosphate <- NULL
data$Silicate <- NULL
SFB_data <- na.omit(data)
# split date and timem
date <- as.Date(SFB_data$TimeStamp)
# adding new columns
SFB_data["Date"] <- date
head(SFB_data)
SFB_data$TimeStamp <- NULL
SFB_DATA <- data.frame(SFB_data)
year <- format(as.Date(SFB_DATA$Date, format="%Y/%m/%d"),"%Y")
month <- format(as.Date(SFB_DATA$Date, format="%Y/%m/%d"),"%m")
day <- format(as.Date(SFB_DATA$Date, format="%Y/%m/%d"),"%d")
SFB_DATA["Year"] <- year
SFB_DATA["Month"] <- month
SFB_DATA["Day"] <- day
SFB_DATA$Date <- NULL
head(SFB_DATA)
write.csv(SFB_DATA,'C:/Users/user/Documents/SFB_DATA.csv', row.names = FALSE)