-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexcel_cleaner.R
72 lines (57 loc) · 2.35 KB
/
excel_cleaner.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Read in excel files
# output a nice csv
rm(list = ls())
library(readxl)
library(tidyverse)
setwd("/Users/skycope/Documents/UCT/Stats\ Honours/Project/Data\ and\ code")
# Between 2019-11-04 and 2019-11-24 the machine wasn't working
# I set these observations to NA (were recorded as 0)
file1 = read_xls("Cape Town aug19-feb20.xls", sheet = 4, skip = 2) %>%
filter(Category == 'Grass') %>%
select(-1:-4) %>%
t() %>%
data.frame() %>%
mutate(pollen_count = X1 + X2 + X3 + X4) %>%
select(pollen_count) %>%
mutate(date = seq(as.Date("2019-08-12"), as.Date("2020-02-09"), by = 1)) %>%
mutate(pollen_count =
ifelse(date >= as.Date("2019-11-04") & date <= as.Date("2019-11-24"), NA, pollen_count))
file2 = read_xls("Cape Town mar-sept 2019.xls", sheet = 4, skip = 2) %>%
filter(Category == 'Grass') %>%
select(-1:-4) %>%
t() %>%
data.frame() %>%
mutate(pollen_count = as.numeric(X1) + as.numeric(X2) + as.numeric(X3) + as.numeric(X4)) %>%
select(pollen_count) %>%
mutate(date = seq(as.Date("2019-03-12"), as.Date("2019-09-09"), by = 1))
# Unusable I think -- every observation is 0
#file3 = read_excel("SAAO July 2017- jan 2018.xlsx", sheet = 4, skip = 2) %>%
# filter(Category == 'Grass') %>%
# select(-1:-4) %>%
# t() %>%
# data.frame() %>%
# mutate(pollen_count = as.numeric(X1) + as.numeric(X2) + as.numeric(X3) + as.numeric(X4)) %>%
# select(pollen_count) %>%
# mutate(date = seq(as.Date("2017-07-26"), as.Date("2018-01-23"), by = 1))
file4 = read_xls("SAAO-sep2018-mar2019.xls", sheet = 4, skip = 2) %>%
filter(Category == 'Grass') %>%
select(-1:-4) %>%
t() %>%
data.frame() %>%
mutate(pollen_count = as.numeric(X1) + as.numeric(X2) + as.numeric(X3) + as.numeric(X4)) %>%
select(pollen_count) %>%
mutate(date = seq(as.Date("2018-09-11"), as.Date("2019-03-11"), by = 1))
# Merge files together
merged = rbind(file4, file2, file1)
# Read in new data
old_data = read.csv("grassdata.csv", h = T) %>%
select(value, date) %>%
rename(pollen_count = value) %>% mutate(date = as.Date(date))
# Merge all into one file
new_data = rbind(old_data,merged)
ggplot(new_data, aes(x = date, y = pollen_count)) +
geom_line() +
theme_minimal()
write.csv(new_data, "all_counts.csv")
dev.off()
# plot(totalCombinedNoNAs$date, totalCombinedNoNAs$Minimum.Temperature, type = 'l')